コード例 #1
0
    def test_forward_consistency(self):
        x_cpu = chainer.Variable(self.x)
        W_cpu = chainer.Variable(self.W)
        b_cpu = None if self.nobias else chainer.Variable(self.b)
        with chainer.using_config('cudnn_deterministic',
                                  self.cudnn_deterministic):
            y_cpu = F.deconvolution_2d(
                x_cpu, W_cpu, b_cpu, stride=self.stride, pad=self.pad,
                outsize=self.outsize)

        x_gpu = chainer.Variable(cuda.to_gpu(self.x))
        W_gpu = chainer.Variable(cuda.to_gpu(self.W))
        b_gpu = None if self.nobias else chainer.Variable(
            cuda.to_gpu(self.b))
        with chainer.using_config('use_cudnn', self.use_cudnn):
            with chainer.using_config('cudnn_deterministic',
                                      self.cudnn_deterministic):
                y_gpu = F.deconvolution_2d(
                    x_gpu, W_gpu, b_gpu, stride=self.stride, pad=self.pad,
                    outsize=self.outsize)

        self.assertEqual(y_cpu.data.dtype, self.x_dtype)
        self.assertEqual(y_gpu.data.dtype, self.x_dtype)
        testing.assert_allclose(
            y_cpu.data, y_gpu.data.get(), **self.test_forward_options)
コード例 #2
0
    def test_call_cudnn_backward(self):
        with chainer.using_config('use_cudnn', self.use_cudnn):
            # This test includes the case in which cudnn < v3
            # and cudnn_deterministic is True, in which
            # the backward method of chainer.functions.Deconvolution2D
            # must raise an error.
            # But in this case, its forward method should raise
            # an error as well.
            # Therefore, we intentionally set cudnn_deterministic
            # to False so that the forward method does not
            # raise an error.
            with chainer.using_config('cudnn_deterministic', False):
                y = self.forward()
        y.grad = self.gy

        data_func_name = 'cupy.cuda.cudnn.convolutionForward'
        filter_func_name = 'cupy.cuda.cudnn.convolutionBackwardFilter_v3'

        with chainer.using_config('use_cudnn', self.use_cudnn):
            with chainer.using_config('cudnn_deterministic',
                                      self.cudnn_deterministic):
                with mock.patch(data_func_name) as data_func,\
                        mock.patch(filter_func_name) as filter_func:
                    y.backward()
                    self.assertEqual(
                        data_func.called, self.should_call_cudnn)
                    self.assertEqual(
                        filter_func.called, self.should_call_cudnn)
コード例 #3
0
 def test_call_cudnn_forward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with chainer.using_config('cudnn_deterministic',
                                   self.cudnn_deterministic):
             with mock.patch('cupy.cudnn.cudnn.convolutionForward') as func:
                 self.forward()
                 self.assertEqual(func.called, self.should_call_cudnn)
コード例 #4
0
    def check_backward(self, x_data, W_data, b_data, y_grad,
                       use_cudnn='never'):
        if not self.c_contiguous:
            xp = cuda.get_array_module(x_data)
            x_data = xp.asfortranarray(x_data)
            W_data = xp.asfortranarray(W_data)
            y_grad = xp.asfortranarray(y_grad)
            self.assertFalse(x_data.flags.c_contiguous)
            self.assertFalse(W_data.flags.c_contiguous)
            self.assertFalse(y_grad.flags.c_contiguous)
            if b_data is not None:
                b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype)
                b[::2] = b_data
                b_data = b[::2]
                self.assertFalse(b_data.flags.c_contiguous)

        inputs = (x_data, W_data)
        if b_data is not None:
            inputs = inputs + (b_data,)

        ndim = len(self.dims)
        with chainer.using_config('use_cudnn', use_cudnn):
            with chainer.using_config('autotune', self.autotune):
                gradient_check.check_backward(
                    deconvolution_nd.DeconvolutionND(
                        ndim, self.stride, self.pad, self.outsize),
                    inputs, y_grad, **self.check_backward_options)
コード例 #5
0
ファイル: test_n_step_gru.py プロジェクト: unnonouno/chainer
 def test_forward_gpu_train(self):
     self.rnn.to_gpu()
     with chainer.using_config('use_cudnn', 'always'), \
             chainer.using_config('train', True):
         self.check_forward(
             cuda.to_gpu(self.h),
             [cuda.to_gpu(x) for x in self.xs])
コード例 #6
0
ファイル: test_convolution_nd.py プロジェクト: jnishi/chainer
    def check_double_backward(self, x_data, W_data, b_data, y_grad,
                              x_grad_grad, W_grad_grad, b_grad_grad,
                              use_cudnn='always'):
        if not self.c_contiguous:
            (x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad,
                b_grad_grad) = testing.array._as_noncontiguous_array(
                    (x_data, W_data, b_data, y_grad, x_grad_grad, W_grad_grad,
                     b_grad_grad))

        args = (x_data, W_data)
        grad_grads = (x_grad_grad, W_grad_grad)
        if b_data is not None:
            args += (b_data,)
            grad_grads += (b_grad_grad,)

        def f(*args):
            return F.convolution_nd(
                *args, stride=self.stride, pad=self.pad,
                cover_all=self.cover_all, dilate=self.dilate,
                groups=self.groups)

        with chainer.using_config('use_cudnn', use_cudnn):
            with chainer.using_config('autotune', self.autotune):
                gradient_check.check_double_backward(
                    f, args, y_grad, grad_grads,
                    dtype='d', atol=5e-3, rtol=5e-2)
コード例 #7
0
    def check_backward_consistency_regression(self, x_data, gy_data,
                                              use_cudnn='always'):
        # Regression test to two-dimensional average pooling layer.

        if len(self.dims) != 2:
            return

        ksize = self.ksize
        stride = self.stride
        pad = self.pad
        xp = cuda.get_array_module(x_data)

        # Backward computation for N-dimensional average pooling layer.
        x_nd = chainer.Variable(xp.array(x_data))
        with chainer.using_config('use_cudnn', use_cudnn):
            func_nd = functions.AveragePoolingND(self.ndim, ksize,
                                                 stride=stride, pad=pad)
        y_nd = func_nd(x_nd)
        y_nd.grad = gy_data
        y_nd.backward()

        # Backward computation for two-dimensional average pooling layer.
        x_2d = chainer.Variable(xp.array(x_data))
        with chainer.using_config('use_cudnn', use_cudnn):
            func_2d = functions.AveragePooling2D(ksize, stride=stride, pad=pad,
                                                 cover_all=False)
        y_2d = func_2d(x_2d)
        y_2d.grad = gy_data
        y_2d.backward()

        # Test that the two result gradients are close enough.
        testing.assert_allclose(x_nd.grad, x_2d.grad)
コード例 #8
0
    def check_double_backward(
            self, inputs, grad_outputs, grad_grad_inputs, use_cudnn='always'):
        if not self.c_contiguous:
            inputs = array._as_noncontiguous_array(inputs)
            grad_outputs = array._as_noncontiguous_array(grad_outputs)
            grad_grad_inputs = array._as_noncontiguous_array(grad_grad_inputs)

        x_data, W_data, b_data = inputs
        y_grad, = grad_outputs
        x_grad_grad, W_grad_grad, b_grad_grad = grad_grad_inputs

        args = (x_data, W_data)
        grad_grads = (x_grad_grad, W_grad_grad)
        if b_data is not None:
            args += (b_data,)
            grad_grads += (b_grad_grad,)

        def f(*args):
            return F.deconvolution_nd(
                *args, stride=self.stride, pad=self.pad, outsize=self.outsize,
                dilate=self.dilate, groups=self.groups)

        with chainer.using_config('use_cudnn', use_cudnn):
            with chainer.using_config('autotune', self.autotune):
                gradient_check.check_double_backward(
                    f, args, y_grad, grad_grads,
                    **self.check_double_backward_options)
コード例 #9
0
    def check_backward(self, x_data, W_data, b_data, y_grad,
                       use_cudnn='never'):
        if not self.c_contiguous:
            xp = backend.get_array_module(x_data)
            x_data = xp.asfortranarray(x_data)
            W_data = xp.asfortranarray(W_data)
            y_grad = xp.asfortranarray(y_grad)
            self.assertFalse(x_data.flags.c_contiguous)
            self.assertFalse(W_data.flags.c_contiguous)
            self.assertFalse(y_grad.flags.c_contiguous)
            if b_data is not None:
                b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype)
                b[::2] = b_data
                b_data = b[::2]
                self.assertFalse(b_data.flags.c_contiguous)

        args = (x_data, W_data)
        if b_data is not None:
            args += (b_data,)

        def f(*args):
            return F.deconvolution_nd(*args, stride=self.stride, pad=self.pad,
                                      outsize=self.outsize, dilate=self.dilate,
                                      groups=self.groups)

        with chainer.using_config('use_cudnn', use_cudnn):
            with chainer.using_config('autotune', self.autotune):
                gradient_check.check_backward(
                    f, args, y_grad, **self.check_backward_options)
コード例 #10
0
    def check_backward(self, x_data, W_data, b_data, y_grad):
        xp = cuda.get_array_module(x_data)

        if not self.c_contiguous:
            x_data = xp.asfortranarray(x_data)
            W_data = xp.asfortranarray(W_data)
            y_grad = xp.asfortranarray(y_grad)
            self.assertFalse(x_data.flags.c_contiguous)
            self.assertFalse(W_data.flags.c_contiguous)
            self.assertFalse(y_grad.flags.c_contiguous)
            if b_data is not None:
                b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype)
                b[::2] = b_data
                b_data = b[::2]
                self.assertFalse(b_data.flags.c_contiguous)

        args = (x_data, W_data)
        if b_data is not None:
            args = args + (b_data,)

        def f(*args):
            return F.convolution_2d(*args, stride=self.stride, pad=self.pad,
                                    cover_all=self.cover_all,
                                    dilate=self.dilate)

        with chainer.using_config('use_cudnn', self.use_cudnn):
            with chainer.using_config('cudnn_deterministic',
                                      self.cudnn_deterministic):
                with chainer.using_config('autotune', self.autotune):
                    gradient_check.check_backward(
                        f, args, y_grad, dtype='d', atol=5e-4, rtol=5e-3)
コード例 #11
0
    def check_backward(self, x_data, W_data, b_data, y_grad):
        xp = cuda.get_array_module(x_data)

        if not self.c_contiguous:
            x_data = xp.asfortranarray(x_data)
            W_data = xp.asfortranarray(W_data)
            y_grad = xp.asfortranarray(y_grad)
            self.assertFalse(x_data.flags.c_contiguous)
            self.assertFalse(W_data.flags.c_contiguous)
            self.assertFalse(y_grad.flags.c_contiguous)
            if b_data is not None:
                b = xp.empty((len(b_data) * 2,), dtype=self.b.dtype)
                b[::2] = b_data
                b_data = b[::2]
                self.assertFalse(b_data.flags.c_contiguous)

        args = (x_data, W_data)
        if b_data is not None:
            args = args + (b_data,)

        with chainer.using_config('use_cudnn', self.use_cudnn):
            with chainer.using_config('cudnn_deterministic',
                                      self.cudnn_deterministic):
                gradient_check.check_backward(
                    convolution_2d.Convolution2DFunction(
                        self.stride, self.pad, self.cover_all),
                    args, y_grad, **self.check_backward_options)
コード例 #12
0
 def check_backward(self, args, y_grad, use_cudnn='always'):
     with chainer.using_config('use_cudnn', use_cudnn), \
             chainer.using_config('train', self.train):
         gradient_check.check_backward(
             batch_normalization.BatchNormalizationFunction(
                 mean=None, var=None,
                 decay=self.decay, eps=self.eps), args, y_grad,
             **self.check_backward_options)
コード例 #13
0
ファイル: test_n_step_gru.py プロジェクト: delta2323/chainer
 def test_forward_gpu_volatile(self):
     with chainer.using_config('use_cudnn', self.use_cudnn), \
             chainer.using_config('enable_backprop', False):
         self.check_forward(
             cuda.to_gpu(self.hx),
             [cuda.to_gpu(x) for x in self.xs],
             [[cuda.to_gpu(w) for w in ws] for ws in self.ws],
             [[cuda.to_gpu(b) for b in bs] for bs in self.bs])
コード例 #14
0
 def test_call_cudnn_forward(self):
     name = 'cupy.cudnn.convolution_backward_data'
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with chainer.using_config('cudnn_deterministic',
                                   self.cudnn_deterministic):
             with testing.patch(name) as func:
                 self.forward()
             self.assertEqual(func.called, self.should_call_cudnn)
コード例 #15
0
ファイル: test_init.py プロジェクト: MakotoSeto/chainer
    def test_invalid_config(self):
        with chainer.using_config('use_cudnn', True):
            self.assertRaises(ValueError, chainer.should_use_cudnn, '>=auto')

        with chainer.using_config('use_cudnn', False):
            self.assertRaises(ValueError, chainer.should_use_cudnn, '>=auto')

        with chainer.using_config('use_cudnn', 'on'):
            self.assertRaises(ValueError, chainer.should_use_cudnn, '>=auto')
コード例 #16
0
ファイル: test_n_step_gru.py プロジェクト: unnonouno/chainer
 def call_forward(self, train):
     hx = _wrap_variable(_to_gpu(self.hx))
     xs = _wrap_variable(_to_gpu(self.xs))
     ws = _wrap_variable(_to_gpu(self.ws))
     bs = _wrap_variable(_to_gpu(self.bs))
     with chainer.using_config('enable_backprop', train), \
             chainer.using_config('train', train):
         return functions.n_step_bigru(
             self.n_layers, self.dropout, hx, ws, bs, xs)
コード例 #17
0
 def check_double_backward(self, args, y_grad, x_grad_grad,
                           use_cudnn='always'):
     def f(*args):
         y = self.batch_normalization(*args)
         return y * y  # make nonlinear against beta
     with chainer.using_config('use_cudnn', use_cudnn), \
             chainer.using_config('train', self.train):
         gradient_check.check_double_backward(
             f, args, y_grad, x_grad_grad,
             **self.check_double_backward_options)
コード例 #18
0
 def test_call_cudnn_backward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with chainer.using_config('cudnn_deterministic',
                                   self.cudnn_deterministic):
             y = self.forward()
             y.grad = self.gy
             name = 'cupy.cudnn.cudnn.convolutionBackwardData_v3'
             with mock.patch(name) as func:
                 y.backward()
             self.assertEqual(func.called, self.should_call_cudnn)
コード例 #19
0
ファイル: test_n_step_lstm.py プロジェクト: asi1024/chainer
 def test_forward_nonzero_gpu_test(self):
     # Issue #5347
     # to_gpu should work without setting the current device
     self.rnn.to_gpu(1)
     with chainer.using_config('use_cudnn', 'always'), \
             chainer.using_config('train', False):
         self.check_forward(
             cuda.to_gpu(self.h, 1),
             cuda.to_gpu(self.c, 1),
             [cuda.to_gpu(x, 1) for x in self.xs])
コード例 #20
0
ファイル: test_inv.py プロジェクト: pfnet/chainer
    def test_singular_gpu(self):
        x = chainer.Variable(
            cuda.to_gpu(numpy.zeros((1, 2, 2), dtype=numpy.float32)))

        # Should raise exception only when debug mode.
        with chainer.using_config('debug', False):
            functions.batch_inv(x)

        with chainer.using_config('debug', True):
            with self.assertRaises(ValueError):
                functions.batch_inv(x)
コード例 #21
0
 def __call__(self, cur_word):
     # Given the current word ID, predict the next word.
     x = self.embed(cur_word)
     # dropout. ref: https://docs.chainer.org/en/stable/reference/generated/chainer.functions.dropout.html?highlight=dropout
     with chainer.using_config('train', True):
         x = F.dropout(x, args.dropout)
     h = self.mid(x)
     with chainer.using_config('train', True):
         h = F.dropout(h, args.dropout)
     y = self.out(h)
     return y
コード例 #22
0
ファイル: test_init.py プロジェクト: MakotoSeto/chainer
    def test_valid_case_combination(self):
        with chainer.using_config('use_cudnn', 'always'):
            self.assertTrue(chainer.should_use_cudnn('==always'))
            self.assertTrue(chainer.should_use_cudnn('>=auto'))

        with chainer.using_config('use_cudnn', 'auto'):
            self.assertFalse(chainer.should_use_cudnn('==always'))
            self.assertTrue(chainer.should_use_cudnn('>=auto'))

        with chainer.using_config('use_cudnn', 'never'):
            self.assertFalse(chainer.should_use_cudnn('==always'))
            self.assertFalse(chainer.should_use_cudnn('>=auto'))
コード例 #23
0
    def test_consistency_with_cudnn_cpu(self):
        with chainer.using_config('use_cudnn', 'never'):
            x_cpu, grid_cpu, y_cpu = self._apply_backward(
                self.x, self.grid, self.grads)
        with chainer.using_config('use_cudnn', 'always'):
            x_cudnn, grid_cudnn, y_cudnn = self._apply_backward(
                cuda.to_gpu(self.x), cuda.to_gpu(self.grid),
                cuda.to_gpu(self.grads))

        testing.assert_allclose(y_cpu.data, y_cudnn.data)
        testing.assert_allclose(x_cpu.grad, x_cudnn.grad)
        testing.assert_allclose(grid_cpu.grad, grid_cudnn.grad)
コード例 #24
0
ファイル: test_n_step_gru.py プロジェクト: delta2323/chainer
 def forward(self, train):
     with chainer.using_config('use_cudnn', self.use_cudnn), \
             chainer.using_config('enable_backprop', train), \
             chainer.using_config('train', train):
         h = chainer.Variable(self.hx)
         xs = [chainer.Variable(x) for x in self.xs]
         ws = [[chainer.Variable(w) for w in ws]
               for ws in self.ws]
         bs = [[chainer.Variable(b) for b in bs]
               for bs in self.bs]
         return functions.n_step_bigru(
             self.n_layers, self.dropout, h, ws, bs, xs)
コード例 #25
0
    def _run(self):
        with chainer.using_config('use_cudnn', 'always'):
            with chainer.using_config('cudnn_deterministic', True):
                # verify data continuity and move to gpu
                x_data, W_data, b_data, gy_data = \
                    tuple(cuda.to_gpu(data) for data in self._contiguous(
                        self.x, self.W, self.b, self.gy))
                x, W, b, y = self._run_forward(x_data, W_data, b_data)

                y.grad = gy_data
                y.backward()
                return x, W, b, y
コード例 #26
0
ファイル: backend.py プロジェクト: fukatani/chainer
 def __enter__(self):
     self._contexts = [
         chainer.using_config(
             'use_cudnn', self.use_cudnn),
         chainer.using_config(
             'cudnn_deterministic', self.cudnn_deterministic),
         chainer.using_config(
             'autotune', self.autotune),
     ]
     for c in self._contexts:
         c.__enter__()
     return self
コード例 #27
0
ファイル: eval.py プロジェクト: souravsingh/models
    def predict(model, test_iter):
        probs = []
        test_iter.reset()

        for batch in test_iter:
            in_arrays = convert.concat_examples(batch, args.gpu)

            with chainer.using_config('train', False), \
                 chainer.using_config('enable_backprop', False):
                y = model(in_arrays[0])
                prob = chainer.functions.softmax(y)
                probs.append(prob.data)
        return concat_arrays(probs)
コード例 #28
0
ファイル: ch.py プロジェクト: seasky100/DL_benchmarks
 def __init__(self, model, ngpu, options,
              data_options=None, time_options=None):
     
     self.ngpu = ngpu
     self.gpu_mode = True if ngpu >= 1 else False
     self.time_options = time_options
     
     if self.gpu_mode:
         self.model = [copy.deepcopy(model).to_gpu(i) for i in range(ngpu)]
     else:
         self.model = model
     if options['benchmark_mode']:            
         chainer.using_config('autotune', True)
コード例 #29
0
ファイル: train.py プロジェクト: souravsingh/models
def iter_predict(Xs, Ms):
    logits = []
    with chainer.using_config('train', False), \
            chainer.using_config('enable_backprop', False):
        for xmb, mmb in iter_data(
                Xs, Ms, n_batch=n_batch_train, truncate=False, verbose=True):
            n = len(xmb)
            XMB = model.xp.asarray(xmb)
            MMB = model.xp.asarray(mmb)
            h = model(XMB)
            clf_logits = clf_head(h, XMB)
            logits.append(cuda.to_cpu(clf_logits.array))
    logits = np.concatenate(logits, 0)
    return logits
コード例 #30
0
ファイル: test_rsgcn.py プロジェクト: ir5/chainer-chemistry
def test_forward_cpu_graph_invariant(model, data):
    # This RSGCN uses dropout, so we need to forward with test mode
    # to remove stochastic calculation.
    atom_data, adj_data = data[0], data[1]
    with chainer.using_config('train', False):
        y_actual = cuda.to_cpu(model(atom_data, adj_data).data)

    permutation_index = numpy.random.permutation(atom_size)
    permute_atom_data = permute_node(atom_data, permutation_index)
    permute_adj_data = permute_adj(adj_data, permutation_index)
    with chainer.using_config('train', False):
        permute_y_actual = cuda.to_cpu(model(
            permute_atom_data, permute_adj_data).data)
    assert numpy.allclose(y_actual, permute_y_actual, rtol=1.e-4, atol=1.e-5)
コード例 #31
0
 def test_call_cudnn_forward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with testing.patch('cupy.cuda.cudnn.softmaxForward') as func:
             self.forward()
             self.assertEqual(func.called, self.expect)
コード例 #32
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)'
                        )  # open_crf layer only works for CPU mode
    parser.add_argument(
        "--model", "-m",
        help="pretrained model file path")  # which contains pretrained target
    parser.add_argument("--pretrained_model", "-pre", default="resnet101")
    parser.add_argument("--memcached_host", default="127.0.0.1")
    parser.add_argument('--mean_rgb',
                        default=config.ROOT_PATH + "BP4D/idx/mean_rgb.npy",
                        help='image mean .npy file')
    parser.add_argument('--mean_flow',
                        default=config.ROOT_PATH + "BP4D/idx/mean_flow.npy",
                        help='image mean .npy file')
    parser.add_argument('--proc_num',
                        type=int,
                        default=10,
                        help="multiprocess fetch data process number")
    parser.add_argument('--batch',
                        '-b',
                        type=int,
                        default=10,
                        help='mini batch size')
    args = parser.parse_args()
    if not args.model.endswith("model.npz"):
        return
    model_info = extract_mode(args.model)
    database = model_info["database"]
    fold = model_info["fold"]
    split_idx = model_info["split_idx"]
    backbone = model_info["backbone"]
    use_paper_num_label = model_info["use_paper_num_label"]
    use_roi_align = model_info["use_roi_align"]
    two_stream_mode = model_info['two_stream_mode']
    T = model_info["T"]

    adaptive_AU_database(database)
    paper_report_label, class_num = squeeze_label_num_report(
        database, use_paper_num_label)
    paper_report_label_idx = list(paper_report_label.keys())
    if not paper_report_label_idx:
        paper_report_label_idx = None
        class_num = len(config.AU_SQUEEZE)
    else:
        class_num = len(paper_report_label_idx)

    model_print_dict = OrderedDict()
    for key, value in model_info.items():
        model_print_dict[key] = str(value)
    print("""
        {0}
        ======================================
        INFO:
        {1}
        ======================================
        """.format(args.model,
                   json.dumps(model_print_dict, sort_keys=True, indent=8)))

    au_rcnn_train_chain_list = []
    if backbone == 'resnet101':
        if two_stream_mode != TwoStreamMode.rgb_flow:
            pretrained_model = backbone
            au_rcnn = AU_RCNN_Resnet101(
                pretrained_model=pretrained_model,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                use_roi_align=use_roi_align,
                use_optical_flow_input=(
                    two_stream_mode == TwoStreamMode.optical_flow),
                temporal_length=T)
            au_rcnn_train_chain = AU_RCNN_ROI_Extractor(au_rcnn)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain)
        else:  # rgb_flow mode
            au_rcnn_rgb = AU_RCNN_Resnet101(pretrained_model=backbone,
                                            min_size=config.IMG_SIZE[0],
                                            max_size=config.IMG_SIZE[1],
                                            use_roi_align=use_roi_align,
                                            use_optical_flow_input=False,
                                            temporal_length=T)

            au_rcnn_optical_flow = AU_RCNN_Resnet101(
                pretrained_model=backbone,
                min_size=config.IMG_SIZE[0],
                max_size=config.IMG_SIZE[1],
                use_roi_align=use_roi_align,
                use_optical_flow_input=True,
                temporal_length=T)
            au_rcnn_train_chain_rgb = AU_RCNN_ROI_Extractor(au_rcnn_rgb)
            au_rcnn_train_chain_optical_flow = AU_RCNN_ROI_Extractor(
                au_rcnn_optical_flow)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_rgb)
            au_rcnn_train_chain_list.append(au_rcnn_train_chain_optical_flow)
            au_rcnn = au_rcnn_rgb

    model = Wrapper(au_rcnn_train_chain_list,
                    class_num,
                    database,
                    T,
                    two_stream_mode=two_stream_mode,
                    gpus=[args.gpu, args.gpu])

    chainer.serializers.load_npz(args.model, model)
    print("loading {}".format(args.model))
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()

    mc_manager = PyLibmcManager(args.memcached_host)
    img_dataset = AUDataset(database=database,
                            L=T,
                            fold=fold,
                            split_name='test',
                            split_index=split_idx,
                            mc_manager=mc_manager,
                            train_all_data=False,
                            two_stream_mode=two_stream_mode,
                            paper_report_label_idx=paper_report_label_idx)

    video_dataset = TransformDataset(
        img_dataset,
        Transform(L=T,
                  mean_rgb_path=args.mean_rgb,
                  mean_flow_path=args.mean_flow))
    if args.proc_num == 1:
        test_iter = SerialIterator(video_dataset,
                                   batch_size=args.batch,
                                   repeat=False,
                                   shuffle=False)
    else:
        test_iter = MultiprocessIterator(video_dataset,
                                         batch_size=args.batch,
                                         n_processes=args.proc_num,
                                         repeat=False,
                                         shuffle=False,
                                         n_prefetch=10,
                                         shared_mem=10000000)

    with chainer.no_backprop_mode(), chainer.using_config(
            'cudnn_deterministic', True), chainer.using_config('train', False):
        predict_data_path = os.path.dirname(
            args.model) + os.path.sep + "pred_" + os.path.basename(
                args.model)[:os.path.basename(args.model).rindex("_")] + ".npz"
        print("npz_path: {}".format(predict_data_path))
        au_evaluator = ActionUnitEvaluator(
            test_iter,
            model,
            args.gpu,
            database=database,
            paper_report_label=paper_report_label,
            converter=lambda batch, device: concat_examples_not_labels(
                batch, device, padding=0),
            T=T,
            output_path=predict_data_path)
        observation = au_evaluator.evaluate()
        with open(os.path.dirname(args.model) + os.path.sep + "evaluation_result_{0}.json".format(os.path.basename(args.model)\
                                                                            [:os.path.basename(args.model).rindex("_")]
                                                           ), "w") as file_obj:
            file_obj.write(
                json.dumps(observation, indent=4, separators=(',', ': ')))
            file_obj.flush()
コード例 #33
0
 def predict(self, x):
     with chainer.using_config('train', False):
         h = self._logits(x)
         #with chainer.using_config('use_cudnn', 'never'):
         #    return F.softmax(h)
         return F.softmax(h)
コード例 #34
0
def sample_once(encdec,
                src_batch,
                tgt_batch,
                src_mask,
                src_indexer,
                tgt_indexer,
                eos_idx,
                max_nb=None,
                s_unk_tag="#S_UNK#",
                t_unk_tag="#T_UNK#"):
    with chainer.using_config("train", False), chainer.no_backprop_mode():
        print "sample"
        sample_greedy, score, attn_list = encdec(src_batch,
                                                 50,
                                                 src_mask,
                                                 use_best_for_sample=True,
                                                 need_score=True)

        #                 sample, score = encdec(src_batch, 50, src_mask, use_best_for_sample = False)
        assert len(src_batch[0].data) == len(tgt_batch[0].data)
        assert len(sample_greedy[0]) == len(src_batch[0].data)

        debatched_src = de_batch(src_batch,
                                 mask=src_mask,
                                 eos_idx=None,
                                 is_variable=True)
        debatched_tgt = de_batch(tgt_batch, eos_idx=eos_idx, is_variable=True)
        debatched_sample = de_batch(sample_greedy, eos_idx=eos_idx)

        sample_random, score_random, attn_list_random = encdec(
            src_batch,
            50,
            src_mask,
            use_best_for_sample=False,
            need_score=True)

        debatched_sample_random = de_batch(sample_random, eos_idx=eos_idx)

        for sent_num in xrange(len(debatched_src)):
            if max_nb is not None and sent_num > max_nb:
                break
            src_idx_seq = debatched_src[sent_num]
            tgt_idx_seq = debatched_tgt[sent_num]
            sample_idx_seq = debatched_sample[sent_num]
            sample_random_idx_seq = debatched_sample_random[sent_num]

            print "sent num", sent_num

            for name, seq, unk_tag, indexer, this_eos_idx in zip(
                    "src tgt sample sample_random".split(" "), [
                        src_idx_seq, tgt_idx_seq, sample_idx_seq,
                        sample_random_idx_seq
                    ], [s_unk_tag, t_unk_tag, t_unk_tag, t_unk_tag],
                [src_indexer, tgt_indexer, tgt_indexer, tgt_indexer],
                [None, eos_idx, eos_idx, eos_idx]):
                print name, "idx:", seq
                print name, "raw:", " ".join(
                    indexer.deconvert_swallow(
                        seq, unk_tag=unk_tag,
                        eos_idx=this_eos_idx)).encode('utf-8')
                print name, "postp:", indexer.deconvert(
                    seq, unk_tag=unk_tag, eos_idx=this_eos_idx).encode('utf-8')
コード例 #35
0
ファイル: cnn_train.py プロジェクト: bekleyis95/RNN-RecSys
def main(params):
    print("")
    print('# gpu: {}'.format(params["gpu"]))
    print('# unit: {}'.format(params["unit"]))
    print('# batch-size: {}'.format(params["batchsize"]))
    print('# epoch: {}'.format(params["epoch"]))
    print('# number of category: {}'.format(params["output_dimensions"]))
    print('# embedding dimension: {}'.format(params["embedding_dimensions"]))
    print('# current layer: {}'.format(params["current_depth"]))
    print('# model-type: {}'.format(params["model_type"]))
    print('')

    f = open('./CNN/LOG/configuration_' + params["current_depth"] + '.txt',
             'w')
    f.write('# gpu: {}'.format(params["gpu"]) + "\n")
    f.write('# unit: {}'.format(params["unit"]) + "\n")
    f.write('# batch-size: {}'.format(params["batchsize"]) + "\n")
    f.write('# epoch: {}'.format(params["epoch"]) + "\n")
    f.write('# number of category: {}'.format(params["output_dimensions"]) +
            "\n")
    f.write(
        '# embedding dimension: {}'.format(params["embedding_dimensions"]) +
        "\n")
    f.write('# current layer: {}'.format(params["current_depth"]) + "\n")
    f.write('# model-type: {}'.format(params["model_type"]) + "\n")
    f.write("\n")
    f.close()

    embedding_weight = params["embedding_weight"]
    embedding_dimensions = params["embedding_dimensions"]
    input_data = params["input_data"]
    x_train = input_data['x_trn']
    x_val = input_data['x_val']
    y_train = input_data['y_trn']
    y_val = input_data['y_val']

    cnn_params = {
        "cudnn": USE_CUDNN,
        "out_channels": params["out_channels"],
        "row_dim": embedding_dimensions,
        "batch_size": params["batchsize"],
        "hidden_dim": params["unit"],
        "n_classes": params["output_dimensions"],
        "embedding_weight": embedding_weight,
    }
    if params["fine_tuning"] == 0:
        cnn_params['mode'] = 'scratch'
    elif params["fine_tuning"] == 1:
        cnn_params['mode'] = 'fine-tuning'
        cnn_params['load_param_node_name'] = params['upper_depth']

    if params["model_type"] == "XML-CNN":
        model = xml_cnn_model.CNN(**cnn_params)
    else:
        model = cnn_model.CNN(**cnn_params)

    if params["gpu"] >= 0:
        chainer.cuda.get_device_from_id(params["gpu"]).use()
        model.to_gpu()

    # Learning CNN by training and validation data
    # =========================================================

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train = tuple_dataset.TupleDataset(x_train, y_train)
    val = tuple_dataset.TupleDataset(x_val, y_val)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  params["batchsize"],
                                                  repeat=True,
                                                  shuffle=False)
    val_iter = chainer.iterators.SerialIterator(val,
                                                params["batchsize"],
                                                repeat=False,
                                                shuffle=False)

    # The setting of Early stopping validation refers to a loss value (validation/main/loss) obtained by validation data
    # =========================================================
    stop_trigger = training.triggers.EarlyStoppingTrigger(
        monitor='validation/main/loss', max_trigger=(params["epoch"], 'epoch'))

    updater = MyUpdater(train_iter,
                        optimizer,
                        params["output_dimensions"],
                        device=params["gpu"])
    trainer = training.Trainer(updater, stop_trigger, out='./CNN/')

    trainer.extend(
        MyEvaluator(val_iter,
                    model,
                    class_dim=params["output_dimensions"],
                    device=params["gpu"]))
    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.extend(extensions.snapshot_object(
        model, 'parameters_for_multi_label_model_' + params["current_depth"] +
        '.npz'),
                   trigger=training.triggers.MinValueTrigger(
                       'validation/main/loss', trigger=(1, 'epoch')))

    trainer.extend(
        extensions.LogReport(log_name='LOG/log_' + params["current_depth"] +
                             ".txt",
                             trigger=(1, 'epoch')))

    trainer.extend(
        extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    trainer.extend(
        extensions.PlotReport(['main/loss', 'validation/main/loss'],
                              'epoch',
                              file_name='LOG/loss_' + params["current_depth"] +
                              '.png'))

    trainer.run()

    filename = 'parameters_for_multi_label_model_' + params[
        "current_depth"] + '.npz'
    src = './CNN/'
    dst = './CNN/PARAMS'
    shutil.move(os.path.join(src, filename), os.path.join(dst, filename))

    # Prediction process for test data.
    # =========================================================
    print("-" * 50)
    print("Testing...")

    x_tst = input_data['x_tst']
    y_tst = input_data['y_tst']
    n_eval = len(x_tst)

    cnn_params['mode'] = 'test-predict'
    cnn_params['load_param_node_name'] = params["current_depth"]

    if params["model_type"] == "XML-CNN":
        model = xml_cnn_model.CNN(**cnn_params)
    else:
        model = cnn_model.CNN(**cnn_params)

    model.to_gpu()
    output = np.zeros([n_eval, params["output_dimensions"]], dtype=np.int8)
    output_probability_file_name = "CNN/RESULT/probability_" + params[
        "current_depth"] + ".csv"
    with open(output_probability_file_name, 'w') as f:
        f.write(','.join(params["learning_categories"]) + "\n")

    test_batch_size = params["batchsize"]
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        for i in tqdm(six.moves.range(0, n_eval, test_batch_size),
                      desc="Predict Test loop"):
            x = chainer.Variable(
                chainer.cuda.to_gpu(x_tst[i:i + test_batch_size]))
            t = y_tst[i:i + test_batch_size]
            net_output = F.sigmoid(model(x))
            output[i:i + test_batch_size] = select_function(net_output.data)
            with open(output_probability_file_name, 'a') as f:
                tmp = chainer.cuda.to_cpu(net_output.data)
                low_values_flags = tmp < 0.001
                tmp[low_values_flags] = 0
                np.savetxt(f, tmp, fmt='%.4g', delimiter=",")
    return output
コード例 #36
0
def predict(filter_num = 5,inpaint = 1,save_file = ""):
    svm = pickle.load(open('./np_data/svm_over_rap.sav', 'rb'))

    filter_str = str(filter_num) 

    seg = 0
    #モデルの定義
    model =  L.Classifier(CNN())
#   モデルの読み込み
    serializers.load_npz("./_snapshot_epoch-50", model, path= 'updater/model:main/')    


    TP = 0.0
    FP = 0.0
    FN = 0.0
    TN = 0.0
    data_channels = 13
    data_dir_path1 = u"./data/2.5m_median41"
    data_dir_path2 = u"./data/2.5m_half"
    file_list = os.listdir(r'./data/2.5m_half/')

    nnum = 0
    for file_name in file_list:
        root, ext = os.path.splitext(file_name)
        if ext == u'.bmp':
            nnum = nnum + 1
            print(file_name,nnum)
            abs_name1 = data_dir_path1 + '/' + file_name
            abs_name2 = data_dir_path2 + '/' + file_name
            file_name = file_name[:-4]
            
            if data_channels == 3 or data_channels == 33 :
                src_img = cv2.imread(abs_name1)
                height, width,channela = src_img.shape
            
            if data_channels == 1 or data_channels == 13:
                src_img = cv2.imread(abs_name1,0)
                height, width = src_img.shape    
                

            dst_img = cv2.imread(abs_name2)            
            f1_img = cv2.imread(abs_name2)
           

            mask  = np.zeros((height, width), np.uint8)
#           オーバーラップの画素数を決定           
            over_rap = 25
            new_img_height = 50
            new_img_width = 50
            width_split = int(width/(new_img_width-over_rap))-1
            height_split = int(height/(new_img_height-over_rap))-1

            a1,b1,c1 = 0,0,0           
            num  = 0
            for h in range(height_split):
                height_start = h * over_rap
                height_end = height_start + new_img_height
        
                for w in range(width_split):
                    
                    width_start = w * over_rap
                    width_end = width_start + new_img_width

                    num = num +1

                    clp1 = src_img[height_start:height_end, width_start:width_end]                                   
                    PIL_data=Image.fromarray(clp1)

#                   RGBカラー画像
                    if data_channels == 3:
                        
                        r,g,b = PIL_data.split()
                        rImgData = np.asarray(np.float32(r)/255.0)
                        gImgData = np.asarray(np.float32(g)/255.0)
                        bImgData = np.asarray(np.float32(b)/255.0)
                        imgData = np.asarray([rImgData, gImgData, bImgData])
    #                    grayImgData = np.asarray(np.float32(PIL_data)/255.0)
    
                        x = imgData




#                   RGBカラー画像と領域分割
                    if data_channels == 33:
                        
                        r,g,b = PIL_data.split()
                        rImgData = np.asarray(np.float32(r)/255.0)
                        gImgData = np.asarray(np.float32(g)/255.0)
                        bImgData = np.asarray(np.float32(b)/255.0)
                        
                        seg_n = "seg"
                        if os.path.isfile("./data/"+seg_n+"_hall_batch/"+file_name+"_"+str(num)+".bmp") == True:
                            seg_img1 = np.array(Image.open("./data/"+seg_n+"_hall_batch/"+file_name+"_"+str(num)+".bmp").convert('L'))
                            a1 = a1 + 1
                        else :
                            seg_img1 = np.array(np.full((50,50), 255, dtype=np.uint8))
            
                        if os.path.isfile("./data/"+seg_n+"_shadow_batch/"+file_name+"_"+str(num)+".bmp") == True:
                            seg_img2 = np.array(Image.open("./data/"+seg_n+"_shadow_batch/"+file_name+"_"+str(num)+".bmp").convert('L'))
                            b1 = b1 + 1
                        else :
                            seg_img2 = np.array(np.full((50,50), 255, dtype=np.uint8))
                            
                        if os.path.isfile("./data/"+seg_n+"_hyouzi_batch/"+file_name+"_"+str(num)+".bmp") == True:
                            seg_img3 = np.array(Image.open("./data/"+seg_n+"_hyouzi_batch/"+file_name+"_"+str(num)+".bmp").convert('L'))
                            c1 = c1 + 1
                        else :
                            seg_img3 = np.array(np.full((50,50), 255, dtype=np.uint8))
#          
                        seg1  = np.asarray(np.float32(seg_img1)/255.0)
                        seg2  = np.asarray(np.float32(seg_img2)/255.0)
                        seg3  = np.asarray(np.float32(seg_img3)/255.0)
                        
                        imgData = np.asarray([bImgData, gImgData, rImgData,seg1,seg2,seg3])
                        x = imgData



#                   メディアンフィルタを用いた補正処理                    
                    if data_channels == 1:
                        grayImgData = np.asarray(np.float32(PIL_data)/255.0)
                        x = grayImgData[None,...]
                    
#                   メディアンフィルタを用いた補正処理と領域分割                      
                    if data_channels == 13: 


                        grayImgData = np.asarray(np.float32(PIL_data)/255.0)
                        

                        seg_n = "2.5m"
                        if os.path.isfile("./data/"+seg_n+"_hall_over/"+file_name+"_"+str(num)+".bmp") == True:
                            seg_img1 = np.array(Image.open("./data/"+seg_n+"_hall_over/"+file_name+"_"+str(num)+".bmp").convert('L'))
                            a1 = a1 + 1
                        else :
                            seg_img1 = np.array(np.full((50,50), 0, dtype=np.uint8))
            
                        if os.path.isfile("./data/"+seg_n+"_shadow_over/"+file_name+"_"+str(num)+".bmp") == True:
                            seg_img2 = np.array(Image.open("./data/"+seg_n+"_shadow_over/"+file_name+"_"+str(num)+".bmp").convert('L'))
                            b1 = b1 + 1
                        else :
                            seg_img2 = np.array(np.full((50,50), 0, dtype=np.uint8))
                            
                        if os.path.isfile("./data/"+seg_n+"_hyouzi_over/"+file_name+"_"+str(num)+".bmp") == True:
                            seg_img3 = np.array(Image.open("./data/"+seg_n+"_hyouzi_over/"+file_name+"_"+str(num)+".bmp").convert('L'))
                            c1 = c1 + 1
                        else :
                            seg_img3 = np.array(np.full((50,50), 0, dtype=np.uint8))

                        seg1  = np.asarray(np.float32(seg_img1)/255.0)
                        seg2  = np.asarray(np.float32(seg_img2)/255.0)
                        seg3  = np.asarray(np.float32(seg_img3)/255.0)
                        
                        
                        imgData = np.asarray([grayImgData,seg1,seg2,seg3])
                        x = imgData

                    
                    with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
                        y = model.predictor(x[None,...]).data.argmax(axis=1)[0]
                        yy = model.predictor(x[None,...])
                        rate = F.softmax(yy.data)[0][1]


                    if y == 1:

                        for y  in range(height_start,height_end):
                            for x  in range(width_start,width_end):
#                              一回識別されるごとに63ずつマスク画像を明るくする
                               mask[y][x] = mask[y][x]+63
                               if mask[y][x] > 250:
                                   mask[y][x] = 255
                                   
                               dst_img[y][x][2] = dst_img[y][x][2] + 20
                               if dst_img[y][x][2] >=255:
                                   dst_img[y][x][2] = 254
        
#            print(a1,b1,c1)
            
            crack_mask = detection_crack(mask,file_name,svm)  
#            a,b,c,d = F1_measure(f1_img,crack_mask,file_name,seg,"./data/t_gt_gray_own/")                  
            a,b,c,d = F1_measure(f1_img,crack_mask,file_name,seg,"./data/2.5m_gt_gray_own3/")
            TP = TP + a
            FP = FP + b
            FN = FN + c
            TN = TN + d
#
#            
            cv2.imwrite('CNN_output/'+file_name+'.bmp', mask)     
    Precision = (TP+0.001)/(TP+FP+0.001)
    Recall = (TP+0.001)/(TP+FN+0.001)
    F1 = 2*Recall*Precision/(Recall+Precision)
    Specificity = (TN+0.001)/(TN+FP+0.001)      

    print("\n\nTOTAL F1-measure")
    print ("Precision={:.4}".format(Precision))
    print ("Recall={:.4}".format(Recall))
    print ("Specificity={:.4}".format(Specificity)) 
    print ("F1={:.4}\n\n".format(F1))
    f = open("./F1/F1.txt",'w')    
    f.write("Precision={:.4}".format(Precision)+'\n')
    f.write("Recall={:.4}".format(Recall)+"\n")
    f.write("F1={:.4}".format(F1)+'\n')
    f.write("Specificity={:.4}".format(Specificity)+'\n')
    f.close() # ファイルを閉じる
###    

    filter_num = filter_num + 1      
    return 0    
コード例 #37
0
 def setUp(self):
     self.x = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype)
     self.gy = cuda.cupy.random.uniform(-1, 1, (2, 3)).astype(self.dtype)
     with chainer.using_config('use_cudnn', self.use_cudnn):
         self.expect = chainer.should_use_cudnn('>=auto')
コード例 #38
0
def train(
    main_script_path,
    func_train_one_batch,
    param_dict,
    save_distance_matrix=False,
):
    script_filename = os.path.splitext(os.path.basename(main_script_path))[0]

    chainer.config.train = False
    device = 0
    xp = chainer.cuda.cupy
    config_parser = six.moves.configparser.ConfigParser()
    config_parser.read('config')
    log_dir_path = os.path.expanduser(config_parser.get('logs', 'dir_path'))

    p = utils.Logger(log_dir_path, **param_dict)  # hyperparameters

    ##########################################################
    # load database
    ##########################################################
    if p.method == 'proxy_nca':
        iteration_scheme = 'clustering'
    else:
        iteration_scheme = p.method
    streams = data_provider.get_streams(p.batch_size,
                                        dataset=p.dataset,
                                        method=iteration_scheme,
                                        crop_size=p.crop_size)
    stream_train, stream_train_eval, stream_test = streams
    iter_train = stream_train.get_epoch_iterator()

    ##########################################################
    # construct the model
    ##########################################################
    if p.method == 'proxy_nca':
        dataset_class = data_provider.get_dataset_class(p.dataset)
        labels = dataset_class(['train'],
                               sources=['targets'],
                               load_in_memory=True).data_sources
        num_classes = len(np.unique(labels))
        model = ModifiedGoogLeNet(p.out_dim, p.normalize_output, num_classes)
    else:
        model = ModifiedGoogLeNet(p.out_dim, p.normalize_output)

    if device >= 0:
        model.to_gpu()
    model.cleargrads()
    xp = model.xp
    optimizer_class = getattr(chainer.optimizers, p.optimizer)
    optimizer = optimizer_class(p.learning_rate)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(p.l2_weight_decay))

    print(p)
    stop = False
    logger = utils.Logger(log_dir_path)
    logger.soft_test_best = [0]
    time_origin = time.time()
    try:
        for epoch in range(p.num_epochs):
            time_begin = time.time()
            epoch_losses = []

            for i in tqdm(range(p.num_batches_per_epoch),
                          desc='# {}'.format(epoch)):
                with chainer.using_config('train', True):
                    loss = func_train_one_batch(model, p, next(iter_train))
                    loss.backward()
                optimizer.update()
                model.cleargrads()
                epoch_losses.append(loss.data)
                del loss

            loss_average = cuda.to_cpu(xp.array(
                xp.hstack(epoch_losses).mean()))

            # average accuracy and distance matrix for training data
            D, soft, hard, retrieval = evaluate(
                model,
                stream_train_eval.get_epoch_iterator(),
                p.distance_type,
                return_distance_matrix=save_distance_matrix)

            # average accuracy and distance matrix for testing data
            D_test, soft_test, hard_test, retrieval_test = evaluate(
                model,
                stream_test.get_epoch_iterator(),
                p.distance_type,
                return_distance_matrix=save_distance_matrix)

            time_end = time.time()
            epoch_time = time_end - time_begin
            total_time = time_end - time_origin

            logger.epoch = epoch
            logger.total_time = total_time
            logger.loss_log.append(loss_average)
            logger.train_log.append([soft[0], hard[0], retrieval[0]])
            logger.test_log.append(
                [soft_test[0], hard_test[0], retrieval_test[0]])

            # retain the model if it scored the best test acc. ever
            if soft_test[0] > logger.soft_test_best[0]:
                logger.model_best = copy.deepcopy(model)
                logger.optimizer_best = copy.deepcopy(optimizer)
                logger.epoch_best = epoch
                logger.D_best = D
                logger.D_test_best = D_test
                logger.soft_best = soft
                logger.soft_test_best = soft_test
                logger.hard_best = hard
                logger.hard_test_best = hard_test
                logger.retrieval_best = retrieval
                logger.retrieval_test_best = retrieval_test

            print("#", epoch)
            print("time: {} ({})".format(epoch_time, total_time))
            print("[train] loss:", loss_average)
            print("[train] soft:", soft)
            print("[train] hard:", hard)
            print("[train] retr:", retrieval)
            print("[test]  soft:", soft_test)
            print("[test]  hard:", hard_test)
            print("[test]  retr:", retrieval_test)
            print("[best]  soft: {} (at # {})".format(logger.soft_test_best,
                                                      logger.epoch_best))
            print(p)
            # print norms of the weights
            params = xp.hstack([
                xp.linalg.norm(param.data) for param in model.params()
            ]).tolist()
            print("|W|", map(lambda param: float('%0.2f' % param), params))
            print()

            # Draw plots
            if save_distance_matrix:
                plt.figure(figsize=(8, 4))
                plt.subplot(1, 2, 1)
                mat = plt.matshow(D, fignum=0, cmap=plt.cm.gray)
                plt.colorbar(mat, fraction=0.045)
                plt.subplot(1, 2, 2)
                mat = plt.matshow(D_test, fignum=0, cmap=plt.cm.gray)
                plt.colorbar(mat, fraction=0.045)
                plt.tight_layout()

            plt.figure(figsize=(8, 4))
            plt.subplot(1, 2, 1)
            plt.plot(logger.loss_log, label="tr-loss")
            plt.grid()
            plt.legend(loc='best')
            plt.subplot(1, 2, 2)
            plt.plot(logger.train_log)
            plt.plot(logger.test_log)
            plt.grid()
            plt.legend([
                "tr-soft", "tr-hard", "tr-retr", "te-soft", "te-hard",
                "te-retr"
            ],
                       bbox_to_anchor=(1.4, 1))
            plt.ylim([0.0, 1.0])
            plt.xlim([0, p.num_epochs])
            plt.tight_layout()
            plt.show()
            plt.draw()

            del D
            del D_test

    except KeyboardInterrupt:
        stop = True

    dir_name = "-".join([
        p.dataset, script_filename,
        time.strftime("%Y%m%d%H%M%S"),
        str(logger.soft_test_best[0])
    ])

    logger.save(dir_name)
    p.save(dir_name)

    print("total epochs: {} ({} [s])".format(logger.epoch, logger.total_time))
    print("best test score (at # {})".format(logger.epoch_best))
    print("[test]  soft:", logger.soft_test_best)
    print("[test]  hard:", logger.hard_test_best)
    print("[test]  retr:", logger.retrieval_test_best)
    print(str(p).replace(', ', '\n'))
    print()

    return stop
コード例 #39
0
 def call_test(self, x):
     with chainer.using_config('train', False):
         return call_orig(self, x)
コード例 #40
0
    chainer.cuda.get_device_from_id(0).use()
    chainer.cuda.check_cuda_available()
    # GPU用データ形式に変換
    model.to_gpu()

# 学習結果を読み込む
chainer.serializers.load_hdf5('genmodel.hdf5', model)

# 画像を生成する
num_generate = 5  # 生成する画像の数
# 元となるベクトルを作成
rnd = random.uniform(-1, 1, (num_generate, 100, 1, 1))
rnd = cp.array(rnd, dtype=cp.float32)

# バッチ処理を使って一度に生成する
with chainer.using_config('gens', False):
    result = model(rnd)

# 生成した画像と元となったベクトルを保存する
f = codecs.open('vectors.txt', 'w', 'utf8')
for i in range(num_generate):
    # 画像を保存する
    data = np.zeros((128, 128, 3), dtype=np.uint8)
    dst = result.data[i] * 255.0
    if uses_device >= 0:
        dst = chainer.cuda.to_cpu(dst)
    data[:, :, 0] = dst[0]
    data[:, :, 1] = dst[1]
    data[:, :, 2] = dst[2]
    himg = Image.fromarray(data, 'RGB')
    himg.save('gens/gen-' + str(i) + '.png')
コード例 #41
0
                y_batch = spec_utils.crop_and_concat(mask, y_batch, False)

                abs_diff = F.absolute_error(X_batch * mask, y_batch)
                loss = F.mean(abs_diff)
                loss.backward()
                optimizer.update()

                il = abs_diff.data.mean(axis=(1, 2, 3))
                instance_loss[local_perm] += chainer.backends.cuda.to_cpu(il)
                sum_loss += float(loss.data) * len(X_batch)

            train_loss = sum_loss / len(X_train)

            sum_loss = 0
            perm = np.random.permutation(len(X_valid))
            with chainer.no_backprop_mode(), chainer.using_config('train', False):
                for i in range(0, len(X_valid), args.val_batchsize):
                    local_perm = perm[i: i + args.val_batchsize]
                    X_batch = xp.asarray(X_valid[local_perm])
                    y_batch = xp.asarray(y_valid[local_perm])

                    mask = model(X_batch)
                    X_batch = spec_utils.crop_and_concat(mask, X_batch, False)
                    y_batch = spec_utils.crop_and_concat(mask, y_batch, False)

                    loss = F.mean_absolute_error(X_batch * mask, y_batch)
                    sum_loss += float(loss.data) * len(X_batch)

            valid_loss = sum_loss / len(X_valid)
            print('    * training loss = {:.6f}, validation loss = {:.6f}'
                  .format(train_loss * 1000, valid_loss * 1000))
コード例 #42
0
 def test_cpu(self):
     with chainer.using_config('use_ideep', 'never'):
         self._check()
コード例 #43
0
    def predict(self, imgs):
        """Segment object instances from images.

        This method predicts instance-aware object regions for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images of shape
                :math:`(B, C, H, W)`.  All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(masks, labels, scores)`.

           * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \
               where :math:`R` is the number of masks in a image. \
               Each pixel holds value if it is inside the object inside or not.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the masks. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """

        prepared_imgs = []
        sizes = []
        for img in imgs:
            size = img.shape[1:]
            img = self.prepare(img.astype(np.float32))
            prepared_imgs.append(img)
            sizes.append(size)

        masks = []
        labels = []
        scores = []

        for img, size in zip(prepared_imgs, sizes):
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                # inference
                img_var = chainer.Variable(self.xp.array(img[None]))
                scale = img_var.shape[3] / size[1]
                roi_ag_seg_scores, _, roi_cls_scores, bboxes, _ = \
                    self.__call__(img_var, scale)

            # We are assuming that batch size is 1.
            roi_ag_seg_score = roi_ag_seg_scores.array
            roi_cls_score = roi_cls_scores.array
            bbox = bboxes / scale

            # shape: (n_rois, 4)
            bbox[:, 0::2] = self.xp.clip(bbox[:, 0::2], 0, size[0])
            bbox[:, 1::2] = self.xp.clip(bbox[:, 1::2], 0, size[1])

            # shape: (n_roi, roi_size, roi_size)
            roi_seg_prob = F.softmax(roi_ag_seg_score).array[:, 1]
            roi_cls_prob = F.softmax(roi_cls_score).array

            roi_seg_prob = chainer.cuda.to_cpu(roi_seg_prob)
            roi_cls_prob = chainer.cuda.to_cpu(roi_cls_prob)
            bbox = chainer.cuda.to_cpu(bbox)

            roi_seg_prob, bbox, label, roi_cls_prob = mask_voting(
                roi_seg_prob,
                bbox,
                roi_cls_prob,
                size,
                self.score_thresh,
                self.nms_thresh,
                self.mask_merge_thresh,
                self.binary_thresh,
                limit=self.limit,
                bg_label=0)

            height = bbox[:, 2] - bbox[:, 0]
            width = bbox[:, 3] - bbox[:, 1]
            keep_indices = np.where((height > self.min_drop_size)
                                    & (width > self.min_drop_size))[0]
            roi_seg_prob = roi_seg_prob[keep_indices]
            bbox = bbox[keep_indices]
            label = label[keep_indices]
            roi_cls_prob = roi_cls_prob[keep_indices]

            mask = np.zeros((len(roi_seg_prob), size[0], size[1]),
                            dtype=np.bool)
            for i, (roi_seg_pb, bb) in enumerate(zip(roi_seg_prob, bbox)):
                bb = np.round(bb).astype(np.int32)
                y_min, x_min, y_max, x_max = bb
                roi_msk_pb = resize(
                    roi_seg_pb.astype(np.float32)[None],
                    (y_max - y_min, x_max - x_min))
                roi_msk = (roi_msk_pb > self.binary_thresh)[0]
                mask[i, y_min:y_max, x_min:x_max] = roi_msk

            masks.append(mask)
            labels.append(label)
            scores.append(roi_cls_prob)

        return masks, labels, scores
コード例 #44
0
def perform_reconstructions(model=None,
                            train=None,
                            test=None,
                            unseen=None,
                            no_images=None,
                            name_suffix=None,
                            args=None):
    train_ind = np.linspace(0, len(train) - 1, no_images, dtype=int)
    x = chainer.Variable(np.asarray(train[train_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = model(x)
        z1 = model.get_latent(x)
    save_images(x=x.data,
                z=[],
                no_images=no_images,
                filename=os.path.join(args.out, 'train_' + name_suffix),
                args=args)
    save_images(x=x1.data,
                z=z1.data,
                no_images=no_images,
                filename=os.path.join(args.out,
                                      'train_' + name_suffix + "_rec"),
                args=args)

    # reconstruct testing examples
    test_ind = np.linspace(0, len(test) - 1, no_images, dtype=int)
    x = chainer.Variable(np.asarray(test[test_ind]))
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        x1 = model(x)
        z1 = model.get_latent(x)
    save_images(x=x.data,
                z=[],
                no_images=no_images,
                filename=os.path.join(args.out, 'test_' + name_suffix),
                args=args)
    save_images(x=x1.data,
                z=z1.data,
                no_images=no_images,
                filename=os.path.join(args.out,
                                      'test_' + name_suffix + "_rec"),
                args=args)

    # reconstruct unseen examples
    if len(unseen) != 0:
        unseen_ind = np.linspace(0, len(unseen) - 1, no_images, dtype=int)
        x = chainer.Variable(np.asarray(unseen[unseen_ind]))
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            x1 = model(x)
            z1 = model.get_latent(x)
        save_images(x=x.data,
                    z=[],
                    no_images=no_images,
                    filename=os.path.join(args.out, 'unseen_' + name_suffix),
                    args=args)
        save_images(x=x1.data,
                    z=z1.data,
                    no_images=no_images,
                    filename=os.path.join(args.out,
                                          'unseen_' + name_suffix + "_rec"),
                    args=args)

    # draw images from randomly sampled z under a 'vanilla' normal distribution
    z = chainer.Variable(
        np.random.normal(0, 1, (no_images, args.dimz)).astype(np.float32))
    x = model.decode(z)
    save_images(x=x.data,
                z=z.data,
                no_images=no_images,
                filename=os.path.join(args.out, 'sampled_' + name_suffix),
                args=args)
コード例 #45
0
    def update_Z_speech(self, var_propose_distribution=1e-4):
        """
        Parameters:
            var_propose_distribution: float
                the variance of the propose distribution

        Results:
            self.Z_speech_DT: self.xp.array [ n_latent x T ]
                the latent variable of each speech
        """
        self.WHG_noise_FTM = (self.lambda_NFT[1:][..., None] *
                              self.G_NFM[1:, :, None]).sum(axis=0)
        self.UVG_FTM = (self.U_F[:, None] *
                        self.V_T[None])[:, :, None] * self.G_NFM[0, :, None]

        if "backprop" in self.mode_update_Z:  # acceptance rate is calculated from likelihood
            for it in range(self.n_Z_iteration):
                with chainer.using_config('train', False):
                    self.z_optimizer_speech.update(self.loss_func_Z,
                                                   self.z_link_speech.z,
                                                   self.speech_VAE, 0)

            self.Z_speech_DT = self.z_link_speech.z.data.T
            self.power_speech_FT = self.speech_VAE.decode_cupy(
                self.Z_speech_DT)

        if "sampling" in self.mode_update_Z:
            log_var = self.xp.log(
                self.xp.ones_like(self.Z_speech_DT).astype(self.xp.float32) *
                var_propose_distribution)
            Z_speech_old_DT = self.Z_speech_DT
            power_old_FTM = self.speech_VAE.decode_cupy(Z_speech_old_DT)[:, :,
                                                                         None]

            for it in range(self.n_Z_iteration):
                Z_speech_new_DT = chf.gaussian(Z_speech_old_DT, log_var).data
                lambda_old_FTM = power_old_FTM * self.UVG_FTM + self.WHG_noise_FTM
                power_new_FTM = self.speech_VAE.decode_cupy(
                    Z_speech_new_DT)[:, :, None]
                lambda_new_FTM = power_new_FTM * self.UVG_FTM + self.WHG_noise_FTM
                acceptance_rate = self.xp.exp(
                    (self.Qx_power_FTM *
                     (1 / lambda_old_FTM - 1 / lambda_new_FTM)).sum(
                         axis=2).sum(axis=0) +
                    self.xp.log((lambda_old_FTM /
                                 lambda_new_FTM).prod(axis=2).prod(axis=0)))
                accept_flag = self.xp.random.random([self.n_time
                                                     ]) < acceptance_rate
                Z_speech_old_DT[:, accept_flag] = Z_speech_new_DT[:,
                                                                  accept_flag]
                power_old_FTM[:, accept_flag] = power_new_FTM[:, accept_flag]

            self.Z_speech_DT = Z_speech_old_DT
            self.z_link_speech.z = chainer.Parameter(self.Z_speech_DT.T)
            self.power_speech_FT = self.speech_VAE.decode_cupy(
                self.Z_speech_DT)

        self.lambda_NFT[
            0] = self.U_F[:, None] * self.V_T[None] * self.power_speech_FT
        self.Y_FTM = (self.lambda_NFT[..., None] *
                      self.G_NFM[:, :, None]).sum(axis=0)
コード例 #46
0
 def test_bias_invalid_argc2(self):
     func = links.Bias(self.axis, None)
     with chainer.using_config('debug', True):
         with self.assertRaises(AssertionError):
             func(self.x)
コード例 #47
0
ファイル: test_communicator.py プロジェクト: wade1990/chainer
def test_mixed_dtype_communicator_gpu(param):
    model = ExampleMixedModel()
    with chainer.using_config('dtype', param.global_dtype):
        check_multi_node_mean_grad_mixed_dtype(param, model, True)
コード例 #48
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('config', help='YAML config file')
    parser.add_argument('--profile', action='store_true')
    parser.add_argument('--load_generator', type=str, default='')
    parser.add_argument('--logger_level', type=int, default=logging.INFO)
    parser.add_argument('--file_name', type=str, default='')
    args = parser.parse_args()
    print_args(args)

    # init a logger
    logging.basicConfig(level=args.logger_level)

    # load yaml config file
    with open(args.config) as f:
        config = yaml.load(f)

    # set random seed
    misc.set_random_seed(config['seed'])

    # define func to create env, target data sampler, and models
    if config['problem'] == 'photo_enhancement':

        def make_env(process_idx, test):
            assert test, "error: test should be True"
            env = PhotoEnhancementEnvDemo(
                batch_size=1,
                max_episode_steps=config['max_episode_steps'],
                imsize=config['imsize'],
                file_name=args.file_name)
            return env

        sample_env = make_env(0, True)

        gen = SpiralModel(config['imsize'], sample_env.num_parameters,
                          config['L_stages'], config['conditional'])
        dis = SpiralDiscriminator(config['imsize'], config['conditional'])

        dataset = PhotoEnhancementDataset()

    else:
        raise NotImplementedError()

    # initialize optimizers
    gen_opt = chainer.optimizers.Adam(alpha=config['lr'], beta1=0.5)
    dis_opt = chainer.optimizers.Adam(alpha=config['lr'], beta1=0.5)

    gen_opt.setup(gen)
    dis_opt.setup(dis)

    gen_opt.add_hook(chainer.optimizer.GradientClipping(40))
    dis_opt.add_hook(chainer.optimizer.GradientClipping(40))
    if config['weight_decay'] > 0:
        gen_opt.add_hook(NonbiasWeightDecay(config['weight_decay']))
        dis_opt.add_hook(NonbiasWeightDecay(config['weight_decay']))

    # load generator's weight
    assert args.load_generator, "error: specify the weight of the model"
    if args.load_generator:
        serializers.load_npz(args.load_generator, gen)

    # init an spiral agent
    agent = SPIRAL(
        generator=gen,
        discriminator=dis,
        gen_optimizer=gen_opt,
        dis_optimizer=dis_opt,
        dataset=dataset,
        conditional=config['conditional'],
        reward_mode=config['reward_mode'],
        imsize=config['imsize'],
        max_episode_steps=config['max_episode_steps'],
        rollout_n=config['rollout_n'],
        gamma=config['gamma'],
        alpha=config['alpha'],
        beta=config['beta'],
        L_stages=config['L_stages'],
        U_update=config['U_update'],
        gp_lambda=config['gp_lambda'],
        n_save_final_obs_interval=config['n_save_final_obs_interval'],
        outdir=None,
        act_deterministically=True)

    # training mode
    max_episode_len = config['max_episode_steps'] * config['rollout_n']
    steps = config['processes'] * config['n_update'] * max_episode_len

    save_interval = config['processes'] * config[
        'n_save_interval'] * max_episode_len
    eval_interval = config['processes'] * config[
        'n_eval_interval'] * max_episode_len

    step_hook = SpiralStepHook(config['max_episode_steps'], save_interval,
                               None)

    env = make_env(0, True)

    with chainer.using_config('train', False):
        eval_stats = experiments.evaluator.run_evaluation_episodes(
            env=env,
            agent=agent,
            n_steps=None,
            n_episodes=1,
            max_episode_len=1)
コード例 #49
0
ファイル: make_score.py プロジェクト: Xilong-Zhang/RUSE
    xp = cupy
    model = L.Classifier(MLP(args))
    if args.opt == 'Adam':
        path_model = '../models/Trained_2015_2016_{}/Adam_l{}_u{}_b{}_dr{}.snapshot'.format(
            args.SR_models, args.layer, args.unit, args.batchsize,
            args.dropout_rate)
    serializers.load_npz(path_model, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    features, labels = concat_npz(args)

    scores = []
    with chainer.using_config('train', False):
        if args.gpu >= 0:
            for feature in features:
                feature = feature[None, ...]
                feature = to_gpu(feature)
                pred = model.predictor(
                    chainer.Variable(feature.astype(xp.float32)))
                score = to_cpu(pred.data)[0][0]
                scores.append(score)
                print(score)
        else:
            for feature in features:
                feature = feature[None, ...]
                pred = model.predictor(
                    chainer.Variable(feature.astype(np.float32)))
                score = pred.data[0][0]
コード例 #50
0
 def test_param_layout(self):
     with chainer.using_config('compute_mode', 'cudnn_fast'):
         link = self.create_link()
     assert link.W.layout == memory_layouts.CUDNN_CHANNEL_LAST_W
コード例 #51
0
def evaluate(model,
             epoch_iterator,
             distance='euclidean',
             normalize=False,
             batch_size=10,
             return_distance_matrix=False):
    if distance not in ('cosine', 'euclidean'):
        raise ValueError("distance must be 'euclidean' or 'cosine'.")

    with chainer.no_backprop_mode():
        with chainer.using_config('train', False):
            y_data, c_data = iterate_forward(model,
                                             epoch_iterator,
                                             normalize=normalize)

    add_epsilon = True
    xp = cuda.get_array_module(y_data)
    num_examples = len(y_data)

    D_batches = []
    softs = []
    hards = []
    retrievals = []
    yy = xp.sum(y_data**2.0, axis=1)

    if distance == 'cosine':
        y_data = y_data / yy[:, None]  # L2 normalization

    for start in range(0, num_examples, batch_size):
        end = start + batch_size
        if end > num_examples:
            end = num_examples
        y_batch = y_data[start:end]
        yy_batch = yy[start:end]
        c_batch = c_data[start:end]

        D_batch = yy + yy_batch[:, None] - 2.0 * xp.dot(y_batch, y_data.T)
        xp.maximum(D_batch, 0, out=D_batch)
        if add_epsilon:
            D_batch += 1e-40
        # ensure the diagonal components are zero
        xp.fill_diagonal(D_batch[:, start:end], 0)

        soft, hard, retr = compute_soft_hard_retrieval(D_batch, c_data,
                                                       c_batch)

        softs.append(len(y_batch) * soft)
        hards.append(len(y_batch) * hard)
        retrievals.append(len(y_batch) * retr)
        if return_distance_matrix:
            D_batches.append(D_batch)

    avg_softs = xp.sum(softs, axis=0) / num_examples
    avg_hards = xp.sum(hards, axis=0) / num_examples
    avg_retrievals = xp.sum(retrievals, axis=0) / num_examples

    if return_distance_matrix:
        D = cuda.to_cpu(xp.vstack(D_batches))
    else:
        D = None
    return D, avg_softs, avg_hards, avg_retrievals
コード例 #52
0
 def test_call_cudnn_forward(self):
     with chainer.using_config('use_cudnn', self.use_cudnn):
         with mock.patch('cupy.cudnn.cudnn.poolingForward') as func:
             self.forward()
             self.assertEqual(func.called,
                              chainer.should_use_cudnn('>=auto'))
コード例 #53
0
ファイル: function_node.py プロジェクト: otamot/chainer
def grad(outputs,
         inputs,
         grad_outputs=None,
         grad_inputs=None,
         set_grad=False,
         retain_grad=False,
         enable_double_backprop=False,
         loss_scale=None):
    """Computes the gradient of output variables w.r.t.\\  the input variables.

    This function implements the backpropagation algorithm. While
    :meth:`Variable.backward` also implements backprop, this function selects
    the smallest paths in the computational graph needed to compute the
    gradients w.r.t. inputs. The error is backpropagated only through these
    selected paths, which may reduce the overall computational cost.

    This function also differs from :meth:`Variable.backward` in the way to
    return the gradients; it directly returns the gradient variables as a list
    instead of setting gradients to the :attr:`Variable.grad_var` attribute of
    the original variable. It means users do not need to clear the gradient
    w.r.t. each variable before computing the gradient using this function.
    If ``set_grad`` option is set to ``True``, the computed gradient is also
    stored in the :attr:`Variable.grad_var` attribute of each variable, in
    which case any original value of :attr:`Variable.grad_var` will be updated
    even if it had already been set.

    Args:
        outputs (tuple or list of :class:`~chainer.Variable`):
            A sequence of output variables from which backprop starts.
        inputs (tuple or list of :class:`~chainer.Variable`):
            A sequence of input variables each of which this function computes
            the gradient w.r.t.
        grad_outputs (tuple or list of :class:`~chainer.Variable` or None):
            A sequence of variables that gives the initial value of each output
            gradient.
            If an element is set to ``None``, an array filled with 1 is used.
            If this argument itself is ``None``, it is treated as a sequence of
            ``None``\\ s.
        grad_inputs (tuple or list of :class:`~chainer.Variable` or None):
            A sequence of variables that gives the initial value of each input
            gradient. The gradients computed by the backprop
            algorithm are accumulated to them (not in-place). If an element
            is set to ``None``, the gradient is not accumulated to this value.
            If this argument itself is ``None``, it is treated as a sequence of
            ``None``\\ s.
        set_grad (bool): If it is ``True``, the :attr:`Variable.grad_var`
            attribute of each input variable is set to the corresponding
            computed gradient variable.
        retain_grad (bool): If it is ``True``, the gradients w.r.t. all the
            intermediate variables are stored in the :attr:`Variable.grad_var`
            attribute. In this case, the ``set_grad`` option is ignored.
        enable_double_backprop (bool): If it is ``True``, the computed
            gradients can be further backpropagated. Enabling it may increase
            the memory consumption (and possibly the computational time) to
            remember the intermediate gradient values for the second
            backpropagation.
        loss_scale (float): Loss scaling factor. Loss scaling is a usefull
            technique to mitigate vanishing gradient issue that tends to happen
            when low precision data type like float16 is used during training.
            If you set loss scaling factor, gradients of loss values are to be
            multiplied by the factor before backprop starts. The factor is
            propagated to whole gradients in a computational graph along the
            backprop. The gradients of parameters are divided by the factor
            just before the parameters are to be updated.

    Returns:
        A list of gradient variables w.r.t. the inputs.

    """
    if not isinstance(outputs, (tuple, list)):
        raise TypeError('outputs must be a tuple or a list, not {}.'.format(
            type(outputs)))
    if not isinstance(inputs, (tuple, list)):
        raise TypeError('inputs must be a tuple or a list, not {}.'.format(
            type(inputs)))
    if not (grad_outputs is None or isinstance(grad_outputs, (tuple, list))):
        raise TypeError(
            'grad_outputs must be a tuple or a list or None, not {}.'.format(
                type(grad_outputs)))
    if not (grad_inputs is None or isinstance(grad_inputs, (tuple, list))):
        raise TypeError(
            'grad_inputs must be a tuple or a list or None, not {}.'.format(
                type(grad_inputs)))

    for v in outputs:
        # Raise error here if v is created by Function.backward.
        # In such case, we don't know exact inputs of the creator.
        v.node._check_old_style_gradient()

    # The implementation consists of three steps.

    # 1. Backward enumeration: all the nodes reachable backward from the output
    #    nodes are enumerated. The forward direction links are collected in
    #    this step. Note that the variable nodes whose requires_grad is false
    #    are ignored and their creators are not searched.
    candidate_funcs = [
        v.creator_node for v in outputs if v.creator_node is not None
    ]
    visited_funcs = set()
    forward_graph = collections.defaultdict(list)
    while candidate_funcs:
        func = candidate_funcs.pop()
        if func in visited_funcs:
            continue
        visited_funcs.add(func)
        for x in func.inputs:
            # Raise error here if x is created by Function.backward.
            # In such case, we don't know exact inputs of the creator.
            x._check_old_style_gradient()

            if not x.requires_grad:
                continue
            forward_graph[x].append(func)
            creator = x.creator_node
            if creator is not None and creator not in visited_funcs:
                candidate_funcs.append(creator)

    # 2. Forward enumeration: all the nodes in the subgraph reachable from the
    #    input nodes are enumerated. The extracted (sub-)subgraph is the union
    #    of all paths that backpropagation will visit.
    candidate_vars = [x.node for x in inputs]
    visited_funcs = set()
    grad_required = set()
    while candidate_vars:
        x = candidate_vars.pop()
        grad_required.add(x)
        for func in forward_graph[x]:
            if func in visited_funcs:
                continue
            visited_funcs.add(func)
            for y_ref in func.outputs:
                y = y_ref()
                if y is not None and y in forward_graph:
                    candidate_vars.append(y)

    # 3. Backpropagation: the backpropagation is executed along the
    #    (sub-)subgraph. It uses the topological order of the subgraph which is
    #    induced by the reversed order of function applications ("rank").
    grads = _backprop_utils.GradTable()

    # Initialize the gradient mapping.
    if grad_outputs is None:
        grad_outputs = (None, ) * len(outputs)
    for y, gy in zip(outputs, grad_outputs):
        if gy is None:
            with cuda.get_device_from_array(y.data) as device:
                if device is cuda.DummyDevice:
                    gy_data = numpy.ones_like(y.data)
                else:
                    gy_data = cuda.cupy.ones_like(y.data)
                gy = variable.Variable(gy_data, requires_grad=False)
            if loss_scale is not None:
                gy.data *= loss_scale
        grads[y.node] = gy

    if grad_inputs is not None:
        for x, gx in zip(inputs, grad_inputs):
            if gx is not None:
                grads[x.node] = gx

    # Backprop implementation. It edits grads which will only contain the
    # gradients w.r.t. the inputs.
    with chainer.using_config('enable_backprop', enable_double_backprop):
        ret_dict = _backprop(outputs, inputs, grad_required, retain_grad,
                             grads, loss_scale)

    # Extract the gradients w.r.t. the inputs and return them.
    ret = [ret_dict[x.node] for x in inputs]
    if set_grad:
        for x, gx in zip(inputs, ret):
            x.grad_var = gx

    return ret
コード例 #54
0
def train(loss_func,
          optim,
          train_queue,
          max_iter,
          mean_interval,
          save_interval,
          val_queue=None,
          lr_decay_iter_dict={},
          gpu_id=-1,
          ignore_labels=[],
          outdir='./'):
    chainer.global_config.train = True
    chainer.global_config.enable_backprop = True

    if gpu_id >= 0:
        loss_func.to_gpu(device=gpu_id)

    for key, value in lr_decay_iter_dict.items():
        if optim.t >= key:
            if isinstance(optim, optims.Adam):
                optim.eta *= value
            else:
                optim.lr *= value

    sum_loss = 0.
    while optim.t < max_iter:
        x_batch, t_batch, epoch_done = train_queue.get()

        for ignore_label in ignore_labels:
            t_batch[t_batch == ignore_label] = -1

        if gpu_id >= 0:
            x_batch = cuda.to_gpu(x_batch, device=gpu_id)
            t_batch = cuda.to_gpu(t_batch, device=gpu_id)

        loss = loss_func(x_batch, t_batch)
        loss_func.cleargrads()
        loss.backward()

        optim.update()
        sum_loss += float(loss.array)

        if epoch_done:
            optim.new_epoch()

        print(dt.now())
        print('epoch: {0:04d}, iter: {1:07d}, lr: {2:e}'.format(
            optim.epoch, optim.t, optim.lr))
        print('train/loss: {}'.format(float(loss.array)))

        if optim.t in lr_decay_iter_dict:
            if isinstance(optim, optims.Adam):
                optim.eta *= lr_decay_iter_dict[optim.t]
            else:
                optim.lr *= lr_decay_iter_dict[optim.t]

        if optim.t % mean_interval == 0:
            print('mean train/loss: {}'.format(sum_loss / mean_interval))
            sum_loss = 0.

            if val_queue is not None:
                val_loss = 0.
                val_valid_size = 0

                with using_config('train', False), \
                        using_config('enable_backprop', False):
                    while True:
                        x_batch, t_batch, epoch_done = val_queue.get()

                        for ignore_label in ignore_labels:
                            t_batch[t_batch == ignore_label] = -1

                        if len(ignore_labels) > 0:
                            valid_size = (t_batch != -1).sum()
                        else:
                            valid_size = t_batch.size

                        val_valid_size += valid_size

                        if gpu_id >= 0:
                            x_batch = cuda.to_gpu(x_batch, device=gpu_id)
                            t_batch = cuda.to_gpu(t_batch, device=gpu_id)

                        loss = cuda.to_cpu(loss_func(x_batch, t_batch).array)
                        loss *= valid_size

                        val_loss += loss

                        if epoch_done:
                            break

                print('val/loss: {}'.format(val_loss / val_valid_size))

        if optim.t % save_interval == 0:
            save_dst_path = os.path.join(
                outdir, 'model_iter_{0:07d}.npz'.format(optim.t))
            S.save_npz(save_dst_path, optim.target)
            print('save ' + save_dst_path)

            save_dst_path = os.path.join(
                outdir, 'optim_iter_{0:07d}.npz'.format(optim.t))
            S.save_npz(save_dst_path, optim)
            print('save ' + save_dst_path)

        print()

    if optim.t % mean_interval > 0:
        print('mean train/loss: {}'.format(sum_loss /
                                           (optim.t % mean_interval)))

    if optim.t % save_interval > 0:
        save_dst_path = os.path.join(outdir,
                                     'model_iter_{0:07d}.npz'.format(optim.t))
        S.save_npz(save_dst_path, optim.target)
        print('save ' + save_dst_path)

        save_dst_path = os.path.join(outdir,
                                     'optim_iter_{0:07d}.npz'.format(optim.t))
        S.save_npz(save_dst_path, optim)
        print('save ' + save_dst_path)
コード例 #55
0
 def check_backward(self, x_data, y_grad, use_cudnn='always'):
     with chainer.using_config('use_cudnn', use_cudnn):
         gradient_check.check_backward(
             functions.AveragePoolingND(self.ndim, self.ksize, self.stride,
                                        self.pad), x_data, y_grad,
             **self.check_backward_options)
コード例 #56
0
ファイル: test_log_softmax.py プロジェクト: zachmayer/chainer
 def check_backward(self, x_data, gy_data, use_cudnn='always'):
     with chainer.using_config('use_cudnn', use_cudnn):
         gradient_check.check_backward(functions.LogSoftmax(), x_data,
                                       gy_data,
                                       **self.check_backward_options)
コード例 #57
0
 def check_forward_gpu(self, use_cudnn):
     with chainer.using_config('use_cudnn', use_cudnn):
         self.check_forward(_to_gpu(self.hx), _to_gpu(self.xs),
                            _to_gpu(self.ws), _to_gpu(self.bs))
コード例 #58
0
def greedy_batch_translate(encdec,
                           eos_idx,
                           src_data,
                           batch_size=80,
                           gpu=None,
                           get_attention=False,
                           nb_steps=50,
                           reverse_src=False,
                           reverse_tgt=False):
    with chainer.using_config("train", False), chainer.no_backprop_mode():
        if encdec.encdec_type() == "ff":
            result = encdec.greedy_batch_translate(src_data,
                                                   mb_size=batch_size,
                                                   nb_steps=nb_steps)
            if get_attention:
                dummy_attention = []
                for src, tgt in zip(src_data, result):
                    dummy_attention.append(
                        np.zeros((len(src), len(tgt)), dtype=np.float32))
                return result, dummy_attention
            else:
                return result

        nb_ex = len(src_data)
        nb_batch = nb_ex / batch_size + (1 if nb_ex % batch_size != 0 else 0)
        res = []
        attn_all = []
        for i in range(nb_batch):
            current_batch_raw_data = src_data[i * batch_size:(i + 1) *
                                              batch_size]

            if reverse_src:
                current_batch_raw_data_new = []
                for src_side in current_batch_raw_data:
                    current_batch_raw_data_new.append(src_side[::-1])
                current_batch_raw_data = current_batch_raw_data_new

            src_batch, src_mask = make_batch_src(current_batch_raw_data,
                                                 gpu=gpu)
            sample_greedy, score, attn_list = encdec(
                src_batch,
                nb_steps,
                src_mask,
                use_best_for_sample=True,
                keep_attn_values=get_attention)
            deb = de_batch(sample_greedy,
                           mask=None,
                           eos_idx=eos_idx,
                           is_variable=False)
            res += deb
            if get_attention:
                deb_attn = de_batch(attn_list,
                                    mask=None,
                                    eos_idx=None,
                                    is_variable=True,
                                    raw=True,
                                    reverse=reverse_tgt)
                attn_all += deb_attn

        if reverse_tgt:
            new_res = []
            for t in res:
                if t[-1] == eos_idx:
                    new_res.append(t[:-1][::-1] + [t[-1]])
                else:
                    new_res.append(t[::-1])

            res = new_res

        if get_attention:
            assert not reverse_tgt, "not implemented"
            return res, attn_all
        else:
            return res
コード例 #59
0
 def test_backward_gpu(self):
     with chainer.using_config('use_cudnn', 'always'):
         self.check_backward(_to_gpu(self.hx), _to_gpu(self.xs),
                             _to_gpu(self.ws), _to_gpu(self.bs),
                             _to_gpu(self.dhy), _to_gpu(self.dys))
コード例 #60
0
ファイル: elmo.py プロジェクト: GAIMJKP/models-2
    def forward(self, inputs):
        """
        Parameters
        ----------
        inputs : ``torch.autograd.Variable``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.
            We also accept tensors with additional optional dimensions:
            ``(batch_size, dim0, dim1, ..., dimn, timesteps, 50)``

        Returns
        -------
        Dict with keys:
        ``'elmo_representations'``: ``List[torch.autograd.Variable]``
            A ``num_output_representations`` list of ELMo representations for the input sequence.
            Each representation is shape ``(batch_size, timesteps, embedding_dim)``
        ``'mask'``:  ``torch.autograd.Variable``
            Shape ``(batch_size, timesteps)`` long tensor with sequence mask.
        """
        if self.use_character_inputs:
            # reshape the input if needed
            original_shape = inputs.shape
            timesteps, num_characters = original_shape[-2:]
            if len(original_shape) > 3:
                reshaped_inputs = inputs.reshape(
                    (-1, timesteps, num_characters))
            else:
                reshaped_inputs = inputs
        else:
            # reshape the input if needed
            original_shape = inputs.shape
            timesteps = original_shape[-1]
            if len(original_shape) > 2:
                warnings.warn(
                    'It is not tested to use input with shape (batch_size, dim0, ..., timesteps) to token-input Elmo.\n'
                    'Input with shape (batch_size, timesteps) is recommended.')
                reshaped_inputs = inputs.reshape((-1, timesteps))
            else:
                reshaped_inputs = inputs

        # run the biLM
        # no backprop through bilstm for lightening computations
        with chainer.using_config("train", False), \
                chainer.no_backprop_mode():
            bilm_output = self._elmo_lstm.forward(reshaped_inputs)
        layer_activations = bilm_output['activations']
        mask_with_bos_eos = bilm_output['mask']

        # compute the elmo representations
        representations = []
        for i in range(len(self._scalar_mixes)):
            scalar_mix = getattr(self, 'scalar_mix_{}'.format(i))
            representation_with_bos_eos = scalar_mix.forward(
                layer_activations, mask_with_bos_eos)
            representation_without_bos_eos, mask_without_bos_eos = remove_sentence_boundaries(
                representation_with_bos_eos, mask_with_bos_eos)
            representations.append(
                F.dropout(representation_without_bos_eos,
                          ratio=self._dropout_ratio))

        if self.use_character_inputs:
            # reshape if necessary
            if len(original_shape) > 3:
                mask = mask_without_bos_eos.reshape(original_shape[:-1])
                elmo_representations = [
                    representation.reshape(original_shape[:-1] + (-1, ))
                    for representation in representations
                ]
            else:
                mask = mask_without_bos_eos
                elmo_representations = representations
        else:
            if len(original_shape) > 2:
                mask = mask_without_bos_eos.reshape(original_shape)
                elmo_representations = [
                    representation.reshape(original_shape + (-1, ))
                    for representation in representations
                ]
            else:
                mask = mask_without_bos_eos
                elmo_representations = representations

        layer_activations_without_bos_eos = [
            remove_sentence_boundaries_for_variable(a_layer_activation,
                                                    mask_with_bos_eos)[0]
            for a_layer_activation in layer_activations
        ]
        return {
            'elmo_representations': elmo_representations,
            'mask': mask,
            'elmo_layers': layer_activations_without_bos_eos
        }