Beispiel #1
0
def test_parametric_function_api():
    """
    Testing :function:`nnabla.parametric_functions.parametric_function_api`.
    """
    import nnabla as nn
    import inspect
    nn.clear_parameters()
    shape = (2, 3, 4)

    # Signature check
    spec = inspect.getargspec(dummy_parametric_function)
    assert spec.args == ['shape', 'f', 'i', 's', 'name']
    assert spec.defaults == (10, 1, 'dummy', None)
    assert dummy_parametric_function.__doc__.splitlines()[0] == 'Doc'

    # Verify two different ways does the same thing.
    # Using name argument
    v = dummy_parametric_function(shape, name='group1')
    # Using parameter_scope
    with nn.parameter_scope('group1'):
        v = dummy_parametric_function(shape)

    params = nn.get_parameters()
    assert len(params) == 2
    assert list(iterkeys(params)) == ['group1/dummy/p1', 'group1/dummy/p2']

    # No scope
    v = dummy_parametric_function(shape)

    params = nn.get_parameters()
    len(params) == 4
    assert list(iterkeys(params)) == ['group1/dummy/p1', 'group1/dummy/p2',
                                      'dummy/p1', 'dummy/p2']
    nn.clear_parameters()
Beispiel #2
0
def main():
    batch_size, m, h, w = 4, 3, 32, 32
    extension_module = "cpu"
    device_id = 0
    ctx = extension_context(extension_module, device_id=device_id)

    x_l_data = np.random.randn(batch_size, m, h, w)
    y_l_data = (np.random.rand(batch_size, 1) * 10).astype(np.int32)
    x_l = nn.Variable(x_l_data.shape)
    y_l = nn.Variable(y_l_data.shape)
    x_l.d = x_l_data
    y_l.d = y_l_data

    # CNN
    print("# CNN")
    pred = cnn_model_003(ctx, x_l)
    s = 0
    for n, v in nn.get_parameters().iteritems():
        n_params = np.prod(v.shape)
        print(n, n_params)
        s += n_params
    print("n_params={}".format(s))
    nn.clear_parameters()
    
    # Resnet
    print("# Resnet")
    inmaps = 256
    pred = resnet_model(ctx, x_l, inmaps=inmaps)
    s = 0
    for n, v in nn.get_parameters().iteritems():
        n_params = np.prod(v.shape)
        print(n, n_params)
        s += n_params
    print("n_params={}".format(s))
    nn.clear_parameters()
def test_save_load_parameters():
    v = nn.Variable([64, 1, 28, 28], need_grad=False)
    with nn.parameter_scope("param1"):
        with nn.parameter_scope("conv1"):
            h = PF.convolution(v, 32, (3, 3))
            b = PF.batch_normalization(h, batch_stat=True)
        with nn.parameter_scope("conv2"):
            h1 = PF.convolution(v, 32, (3, 3))
            b2 = PF.batch_normalization(h1, batch_stat=True)

    for k, v in iteritems(nn.get_parameters(grad_only=False)):
        v.data.cast(np.float32)[...] = np.random.randn(*v.shape)

    with nn.parameter_scope("param1"):
        param1 = nn.get_parameters(grad_only=False)
        nn.save_parameters("tmp.h5")
        nn.save_parameters("tmp.protobuf")

    with nn.parameter_scope("param2"):
        nn.load_parameters('tmp.h5')
        param2 = nn.get_parameters(grad_only=False)

    with nn.parameter_scope("param3"):
        nn.load_parameters('tmp.protobuf')
        param3 = nn.get_parameters(grad_only=False)

    for par2 in [param2, param3]:
        assert param1.keys() == par2.keys()  # Check order
        for (n1, p1), (n2, p2) in zip(sorted(param1.items()), sorted(par2.items())):
            assert n1 == n2
            assert np.all(p1.d == p2.d)
            assert p1.data.dtype == p2.data.dtype
            assert p1.need_grad == p2.need_grad
Beispiel #4
0
def test_graph_model(model, seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4], need_grad=True)
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    nn.set_default_context(nn.Context())

    # Forwardprop by definintion
    nn.clear_parameters()
    if model == "mlp":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
        z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    elif model == "recurrent":
        with nn.parameter_scope('fc1'):
            z = PF.affine(x, 3)
            z2 = F.relu(z, inplace=True)
        h = z2
        for _ in range(2):
            with nn.parameter_scope('fc2'):
                h = PF.affine(h, 3)
                h = F.relu(h, inplace=True)
        with nn.parameter_scope('fc3'):
            z3 = PF.affine(h, 5)
    elif model == "convolution":
        with nn.parameter_scope('conv1'):
            z = PF.convolution(x, 3, (2, 2))
            z2 = F.relu(z, inplace=True)
        with nn.parameter_scope('fc2'):
            z3 = PF.affine(z2, 5)
    else:
        raise ValueError()
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    L.forward(clear_no_need_grad=True)

    # Backprop
    # Diff should be initialized since they are always accumulated
    x.grad.zero()
    L.backward(clear_buffer=True)
    x.g = rng.randn(*x.shape)
    parameters = nn.get_parameters()
    for param in parameters.values():
        param.grad.zero()
    inputs = [x] + list(parameters.values())

    from nbla_test_utils import \
        compute_analytical_and_numerical_grad_graph as grads
    agrad, ngrad = grads(L, inputs, 1e-3)
    assert np.allclose(ngrad, agrad, atol=1.05e-2)
Beispiel #5
0
def test_parametric_function_1d(inshape, kernel, multiplier, outshape):
    base_axis = len(inshape) - 2
    sample_channels = inshape[base_axis]
    outmap_channels = sample_channels * multiplier
    x = nn.Variable(inshape)
    y = PF.depthwise_convolution(x, kernel, multiplier=multiplier)
    p = nn.get_parameters()
    assert y.shape == outshape
    assert p['depthwise_conv/W'].shape == (outmap_channels,) + kernel
    assert p['depthwise_conv/b'].shape == (outmap_channels,)
    nn.clear_parameters()
Beispiel #6
0
def decompose_network_and_set_params(model_load_path,
                                     reference, slim, rrate=0.75):
    # Parameters loaded globally, but call here for consistency
    nn.load_parameters(model_load_path)

    # Decompose
    with nn.parameter_scope(reference):
        trained_params = nn.get_parameters()
    # original parameter
    W = trained_params["fc3/affine/W"].d
    # original maps
    inmaps = W.shape[0]
    outmaps0 = W.shape[1]
    # new maps, R < N*M / (N+M) * rrate
    outmaps1 = reduce_maps(inmaps, outmaps0, rrate)
    # singular value decomposition
    U, s, V = np.linalg.svd(W, full_matrices=False)
    S = np.diag(s)
    SV = S.dot(V)
    U_approx = U[:, :outmaps1]
    SV_approx = SV[:outmaps1, :outmaps0]

    # Set trained parameters and decomposed parameters
    # set trained parameters
    with nn.parameter_scope(slim):
        slim_params = nn.get_parameters()
    for n, v in trained_params.items():
        if not n in slim_params.keys():
            continue
        v_slim = slim_params[n]
        v_slim.d = v.d
    # set decomposed parameters and original bias
    # a new bias is introduced due to decomposition
    slim_params["fc-d0/affine/W"].d = U_approx
    slim_params["fc-d1/affine/W"].d = SV_approx
    b = trained_params["fc3/affine/b"]
    slim_params["fc-d1/affine/b"].d = b.d

    # Clear the parameters of the reference net
    with nn.parameter_scope(reference):
        nn.clear_parameters()
Beispiel #7
0
def test_graph_rewire(seed, clear_buffer):
    nn.clear_parameters()

    # A. defining graph definition utility
    def mlp2(x, scope):
        with nn.parameter_scope(scope):
            h = F.tanh(PF.affine(x, 10, name='a1'))
            h = F.tanh(PF.affine(h, 10, name='a1'))
            return h

    # A. Create a graph A.
    xa = nn.Variable((2, 10), need_grad=True)
    ya = mlp2(xa, 'a')

    # B. Create a graph B.
    xb = nn.Variable((2, 10), need_grad=True)
    yb = mlp2(xb, 'b')

    # C. Create directly connected graph.
    xc = nn.Variable((2, 10))
    yc = mlp2(mlp2(xc, 'a'), 'b')

    # D. Rewire the graphs A and B.
    xb.rewire_on(ya)

    # E. Check whether the results are the same.
    rng = np.random.RandomState(seed)
    data = rng.randn(*xa.shape)
    xa.d = data
    xc.d = data
    params = nn.get_parameters()

    def zero_grad():
        for p in params.values():
            p.grad.zero()

    def backup_params():
        return [p.g.copy() for p in params.values()]

    # Checking forward
    yb.forward(clear_no_need_grad=clear_buffer)
    yc.forward(clear_no_need_grad=clear_buffer)
    assert_allclose(yb.d, yc.d)
    # Checking backward
    zero_grad()
    yb.backward(clear_buffer=clear_buffer)
    gb = backup_params()
    zero_grad()
    yc.backward(clear_buffer=clear_buffer)
    gc = backup_params()
    assert_allclose(xa.d, xc.d)
    for b, c in zip(gb, gc):
        assert_allclose(b, c)
Beispiel #8
0
    def load_parameters(self, path, extension=".h5"):
        """Load parameters from a file into this module.

        Args:
            path: str or file-like object

        """
        scope = OrderedDict()
        with nn.parameter_scope('', scope):
            nn.load_parameters(path, extension=extension)
            params = nn.get_parameters()
        self.set_parameters(params)
Beispiel #9
0
    def modify(self, f, inputs):
        params = [v.data for v in nn.get_parameters(grad_only=False).values()]
        inputs_ = []
        for inp in inputs:
            if inp.data not in params:
                inputs_.append(inp)
            else:
                inp = inp.get_unlinked_variable(need_grad=False)
                inputs_.append(inp)

        o = self._call_function(f.info.type_name, inputs_, f.info.args)
        return o
Beispiel #10
0
def test_load_save_parameters():
    module = MyModule(shape=(5, 5))
    params = module.get_parameters()

    if not os.path.exists('__nnabla_nas__'):
        os.makedirs('__nnabla_nas__')
    nn.save_parameters('__nnabla_nas__/params.h5', params)
    nn.load_parameters('__nnabla_nas__/params.h5')

    params0 = nn.get_parameters()
    for k, v in params.items():
        assert_allclose(v.d, params0[k].d)
Beispiel #11
0
    def load_parameters(self, path, extension=".h5", raise_if_missing=True):
        """Load parameters from a file into this module.

        Args:
            path: str or file-like object

        """
        scope = OrderedDict()
        with nn.parameter_scope('', scope):
            nn.load_parameters(path, extension=extension)
            params = nn.get_parameters(grad_only=False)
        self.set_parameters(params, raise_if_missing=raise_if_missing)
Beispiel #12
0
    def load_parameters(self, path, raise_if_missing=False):
        r"""Loads parameters from a file with the specified format.

        Args:
            path (str): The path to file.
            raise_if_missing (bool, optional): Raise exception if some
                parameters are missing. Defaults to `False`.
        """
        with nn.parameter_scope('', OrderedDict()):
            nn.load_parameters(path)
            params = nn.get_parameters(grad_only=False)
        self.set_parameters(params, raise_if_missing=raise_if_missing)
Beispiel #13
0
    def train(self):
        # variables for training
        tx_in = nn.Variable(
            [self._batch_size, self._x_input_length, self._cols_size])
        tx_out = nn.Variable(
            [self._batch_size, self._x_output_length, self._cols_size])
        tpred = self.network(tx_in, self._lstm_unit_name, self._lstm_units)
        tpred.persistent = True
        loss = F.mean(F.squared_error(tpred, tx_out))
        solver = S.Adam(self._learning_rate)
        solver.set_parameters(nn.get_parameters())

        # variables for validation
        vx_in = nn.Variable(
            [self._batch_size, self._x_input_length, self._cols_size])
        vx_out = nn.Variable(
            [self._batch_size, self._x_output_length, self._cols_size])
        vpred = self.network(vx_in, self._lstm_unit_name, self._lstm_units)

        # data iterators
        tdata = self._load_dataset(self._training_dataset_path,
                                   self._batch_size,
                                   shuffle=True)
        vdata = self._load_dataset(self._validation_dataset_path,
                                   self._batch_size,
                                   shuffle=True)

        # monitors
        from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
        monitor = Monitor(self._monitor_path)
        monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
        monitor_err = MonitorSeries("Training error", monitor, interval=10)
        monitor_time = MonitorTimeElapsed("Training time",
                                          monitor,
                                          interval=100)
        monitor_verr = MonitorSeries("Validation error", monitor, interval=10)

        # Training loop
        for i in range(self._max_iter):
            if i % self._val_interval == 0:
                ve = self._validate(vpred, vx_in, vx_out, vdata,
                                    self._val_iter)
                monitor_verr.add(i, ve / self._val_iter)
            te = self._train(tpred, solver, loss, tx_in, tx_out, tdata.next(),
                             self._weight_decay)
            monitor_loss.add(i, loss.d.copy())
            monitor_err.add(i, te)
            monitor_time.add(i)
        ve = self._validate(vpred, vx_in, vx_out, vdata, self._val_iter)
        monitor_verr.add(i, ve / self._val_iter)

        # Save a best model parameters
        nn.save_parameters(self._model_params_path)
Beispiel #14
0
    def _get_variable_or_create(self, v, callback, current_scope):

        if v.variable is not None:
            return v.variable

        v = callback._apply_generate_variable(v)

        if v.variable is not None:
            return v.variable

        pvar = v.proto
        name = pvar.name
        shape = list(pvar.shape.dim)
        if shape[0] < 0:
            shape[0] = self.batch_size
        shape = tuple(shape)
        assert np.all(np.array(shape) > 0
                      ), "Shape must be positive. Given {}.".format(shape)

        if pvar.type != 'Parameter':
            # Create a new variable and returns.
            var = nn.Variable(shape)
            v.variable = var
            var.name = name
            return var

        # Trying to load the parameter from .nnp file.
        callback.verbose('Loading parameter `{}` from .nnp.'.format(name))
        try:
            param = get_parameter(name)
            if param is None:
                logger.info(
                    'Parameter `{}` is not found. Initializing.'.format(name))
                tmp = _create_variable(pvar, name, shape, self.rng)
                param = tmp.variable_instance
                set_parameter(name, param)
            # Always copy param to current scope even if it already exists.
            with nn.parameter_scope('', current_scope):
                set_parameter(name, param)
        except:
            import sys
            import traceback
            raise ValueError(
                'An error occurs during creation of a variable `{}` as a'
                ' parameter variable. The error was:\n----\n{}\n----\n'
                'The parameters registered was {}'.format(
                    name, traceback.format_exc(), '\n'.join(
                        list(nn.get_parameters(grad_only=False).keys()))))
        assert shape == param.shape
        param = param.get_unlinked_variable(need_grad=v.need_grad)
        v.variable = param
        param.name = name
        return param
Beispiel #15
0
def test_graph_clear_buffer(seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4])
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    # Network definition
    nn.set_default_context(nn.Context())
    nn.clear_parameters()
    x1 = x + 1
    x2 = x1 - 1
    with nn.parameter_scope('conv1'):
        z = PF.convolution(x2, 3, (2, 2))
        z2 = F.relu(z, inplace=True)
    with nn.parameter_scope('fc2'):
        z3 = PF.affine(z2, 5)
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    import tempfile
    import os
    tmpd = tempfile.mkdtemp()
    nn.save_parameters(os.path.join(tmpd, 'parameter.h5'))
    first = False
    for cnng in [False, True]:
        for cb in [False, True]:
            _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5'))
            for v in nn.get_parameters().values():
                v.grad.zero()
            L.forward(clear_no_need_grad=cnng)
            L.backward(clear_buffer=cb)
            if not first:
                first = True
                g = list(nn.get_parameters().values())[0].g.copy()
            else:
                g2 = list(nn.get_parameters().values())[0].g.copy()
                assert np.all(g == g2)
Beispiel #16
0
def test_graph_clear_buffer(seed):
    np.random.seed(313)
    rng = np.random.RandomState(seed)
    x = nn.Variable([2, 3, 4, 4])
    t = nn.Variable([2, 1])
    x.d = rng.randn(*x.shape)
    t.d = rng.randint(0, 5, size=t.shape)

    # Network definition
    nn.set_default_context(nn.Context())
    nn.clear_parameters()
    x1 = x + 1
    x2 = x1 - 1
    with nn.parameter_scope('conv1'):
        z = PF.convolution(x2, 3, (2, 2))
        z2 = F.relu(z, inplace=True)
    with nn.parameter_scope('fc2'):
        z3 = PF.affine(z2, 5)
    l = F.softmax_cross_entropy(z3, t, 1)
    L = F.mean(l)

    # Forwardprop
    import tempfile
    import os
    tmpd = tempfile.mkdtemp()
    nn.save_parameters(os.path.join(tmpd, 'parameter.h5'))
    first = False
    for cnng in [False, True]:
        for cb in [False, True]:
            _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5'))
            for v in nn.get_parameters().values():
                v.grad.zero()
            L.forward(clear_no_need_grad=cnng)
            L.backward(clear_buffer=cb)
            if not first:
                first = True
                g = list(nn.get_parameters().values())[0].g.copy()
            else:
                g2 = list(nn.get_parameters().values())[0].g.copy()
                assert np.all(g == g2)
Beispiel #17
0
def encode_text(text):
    param_dict = nn.get_parameters()

    embed_dim = param_dict['text_projection'].shape[1]
    context_length = param_dict['positional_embedding'].shape[0]
    vocab_size = param_dict['token_embedding/W'].shape[0]
    transformer_width = param_dict['ln_final/W'].shape[0]
    transformer_heads = transformer_width // 64
    transformer_layers = len(
        set(
            k.split('/')[2] for k in param_dict.keys()
            if k.startswith(f'transformer/resblocks')))

    token_embedding = nn.parameter.get_parameter_or_create(
        name='token_embedding/W', shape=(vocab_size, transformer_width))
    x = F.embed(text, token_embedding)  # [batch_size, n_ctx, d_model]

    positional_embedding = nn.parameter.get_parameter_or_create(
        name='positional_embedding',
        shape=(context_length, transformer_width)).reshape(
            (1, context_length, transformer_width))
    x = x + positional_embedding

    x = F.transpose(x, (1, 0, 2))  # NLD -> LND

    x = transformer(x,
                    transformer_width,
                    transformer_layers,
                    transformer_heads,
                    attn_mask=build_attn_mask(context_length))

    x = F.transpose(x, (1, 0, 2))  # LND -> NLD

    ln_final_W = nn.parameter.get_parameter_or_create(
        name='ln_final/W', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    ln_final_b = nn.parameter.get_parameter_or_create(
        name='ln_final/b', shape=(transformer_width, )).reshape(
            (1, 1, transformer_width))
    x = F.layer_normalization(x, ln_final_b, ln_final_W, batch_axis=(0, 1))

    idx = F.max(text, axis=-1, only_index=True)
    idx.forward()
    x = x[list(range(x.shape[0])), idx.d].reshape((1, x.shape[0], -1))
    text_projection = nn.parameter.get_parameter_or_create(
        name='text_projection', shape=(transformer_width, embed_dim)).reshape(
            (1, transformer_width, embed_dim))
    x = F.batch_matmul(x, text_projection)

    x = x.reshape((-1, embed_dim))

    return x
Beispiel #18
0
    def get_parameters(self, recursive=True, grad_only=False, memo=None):
        """Obtain an OrderedDict object of all parameters in current Module.

        For example,

        .. code-block:: python

            x = nn.Variable.from_numpy_array((np.random.random((8, 32, 256, 256))))
            conv_bn = ConvBn(2)
            y = conv_bn(x)

            params = conv_bn.get_parameters()
            for parameter_name, parameter_value in params.items():
                print("{}:{}".format(parameter_name, parameter_value.shape))

        The output looks like:

        .. code-block:: none

            conv/W:(2, 32, 1, 1)
            bn/beta:(1, 2, 1, 1)
            bn/gamma:(1, 2, 1, 1)
            bn/mean:(1, 2, 1, 1)
            bn/var:(1, 2, 1, 1)

        Notice that the parameter name looks like a filepath, with splash separated
        nested scope name. In addition, module name default is used with a prefix ``@``.

        Args:
            recursive (bool, optional, default=True):
                Whether obtain the parameters of current module's submodules. Default is True.
            grad_only (bool, optional, default=False):
                Whether only obtain the grad. Default is False.

        Returns:
            OrderedDict:
                Flattened parameter's name-value pairs of current Module.
        """
        params = OrderedDict()
        if memo is None:
            memo = ParamMemo()
        if recursive:
            for name, module in self.submodules.items():
                params.update(
                    insert_parent_name(
                        name,
                        module.get_parameters(recursive=recursive, grad_only=grad_only, memo=memo)))
        with nn.parameter_scope('', self.parameter_scope):
            found_params = nn.get_parameters(grad_only=grad_only)
            filtered_params = memo.filter_and_update(found_params)
            params.update(filtered_params)
        return params
Beispiel #19
0
    def __init__(self, black_list=[], params=None, name="identity"):
        self.graph_info = None
        self.entry_variables = None

        self.black_list = black_list
        self.params = params if params is not None else nn.get_parameters(
            grad_only=False)
        self.name = name

        self.end_variable = None
        self.outputs = []
        # output of ref graph to output of new graph (TODO: change name)
        self.input_map = {}
Beispiel #20
0
def training(steps, learning_rate):
    solver = S.Sgd(learning_rate)
    solver.set_parameters(
        nn.get_parameters())  # Set parameter variables to be updated.
    for i in range(steps):
        x.d, t.d = data.next()
        loss.forward()
        solver.zero_grad()  # Initialize gradients of all parameters to zero.
        loss.backward()
        solver.weight_decay(1e-5)  # Applying weight decay as an regularization
        solver.update()
        if i % 100 == 0:  # Print for each 10 iterations
            print(i, loss.d)
Beispiel #21
0
def save_all_params(params_dict, c, k, j, bundle_size, step_size, save_dir,
                    epoch):
    params_dict[c] = nn.get_parameters(grad_only=False).copy()
    c += 1
    if c == bundle_size or j == step_size - 1:
        dn = os.path.join(save_dir, 'epoch%02d' % (epoch), 'weights')
        ensure_dir(dn)
        for cc, params in params_dict.items():
            fn = '%s/model_step%04d.h5' % (dn, k + cc)
            nn.save_parameters(fn, params=params, extension=".h5")
        k += c
        c = 0
        params_dict = {}
    return params_dict, c, k
Beispiel #22
0
    def __init__(self,
                 batch_size=32,
                 learning_rate=1e-4,
                 max_iter=5086,
                 total_epochs=20,
                 monitor_path=None,
                 val_weight=None,
                 model_load_path=None):
        """
        Construct all the necessary attributes for the attribute classifier.
        Args:
            batch_size (int): number of samples contained in each generated batch
            learning_rate (float) : learning rate
            max_iter (int) : maximum iterations for an epoch
            total_epochs (int) : total epochs to train the model
            val_weight : sample weights
            monitor_path (str) : model parameter to be saved
            model_load_path (str) : load the model
        """
        self.batch_size = batch_size
        # Resnet 50
        # training graph
        model = ResNet50()
        self.input_image = nn.Variable((self.batch_size, ) + model.input_shape)
        self.label = nn.Variable([self.batch_size, 1])
        # fine tuning
        pool = model(self.input_image, training=True, use_up_to='pool')
        self.clf = clf_resnet50(pool)
        self.clf.persistent = True
        # loss
        self.loss = F.mean(F.sigmoid_cross_entropy(self.clf, self.label))
        # hyper parameters
        self.solver = S.Adam(learning_rate)
        self.solver.set_parameters(nn.get_parameters())

        # validation graph
        self.x_v = nn.Variable((self.batch_size, ) + model.input_shape)
        pool_v = model(self.x_v, training=False, use_up_to='pool')
        self.v_clf = clf_resnet50(pool_v, train=False)
        self.v_clf_out = F.sigmoid(self.v_clf)
        self.print_freq = 100
        self.validation_weight = val_weight
        # val params
        self.acc = 0.0
        self.total_epochs = total_epochs
        self.max_iter = max_iter
        self.monitor_path = monitor_path

        if model_load_path is not None:
            _ = nn.load_parameters(model_load_path)
Beispiel #23
0
def sample_arch_and_train(args, data_dict, controller_weights_dict):
    """
        Execute these process.
        1. For a certain number of times, let the controller construct sample architectures 
           and test their performances. (By calling get_sample_and_feedback)
        2. By using the performances acquired by the previous process, train the controller.
        3. Select one architecture with the best validation accuracy and train its parameters.
    """

    solver = S.Momentum(args.control_lr)  # create solver for the controller
    solver.set_parameters(controller_weights_dict,
                          reset=False,
                          retain_state=True)
    solver.zero_grad()

    val_list = list()
    arch_list = list()

    with nn.auto_forward():
        for c in range(args.num_candidate):
            output_line = " Architecture {} / {} ".format((c + 1),
                                                          args.num_candidate)
            print("{0:-^80s}".format(output_line))

            # sample one architecture and get its feedback for RL as loss
            loss, val_acc, sample_arch = get_sample_and_feedback(
                args, data_dict)

            val_list.append(val_acc)
            arch_list.append(sample_arch)
            loss.backward()  # accumulate gradient each time

        print("{0:-^80s}\n".format(" Reinforcement Learning Phase "))
        print("current accumulated loss:", loss.d)

        solver.weight_decay(0.025)
        solver.update()  # train the controller

        print("\n{0:-^80s}\n".format(" CNN Learning Phase "))
        best_idx = np.argmax(val_list)
        sample_arch = arch_list[best_idx]
        print("Train the model whose architecture is:")
        show_arch(sample_arch)
        print("and its accuracy is: {:.2f} %\n".format(100 * np.max(val_list)))
        print("Learnable Parameters:", params_count(nn.get_parameters()))

    # train a child network which achieves the best validation accuracy.
    val_acc = CNN_run(args, sample_arch, data_dict, with_train=True)

    return sample_arch, val_acc
def test_pf_prelu_execution(g_rng, inshape, base_axis, shared, slope_init,
                            fix_parameters):

    slope_shape = tuple() if shared else (inshape[base_axis], )
    slope_init = process_param_init(slope_init, slope_shape, g_rng)

    kw = {}
    insert_if_not_none(kw, 'slope_init', slope_init)
    insert_if_not_default(kw, 'base_axis', base_axis, 1)
    insert_if_not_default(kw, 'shared', shared, True)
    insert_if_not_default(kw, 'fix_parameters', fix_parameters, False)

    x = nn.Variable.from_numpy_array(g_rng.randn(*inshape))

    # Check execution
    y = PF.prelu(x, **kw)
    y.forward()
    y.backward()

    # Check values
    # TODO

    # Check args
    assert y.parent.info.type_name == 'PReLU'
    args = y.parent.info.args
    assert args['base_axis'] == base_axis

    # Check created parameters
    assert y.parent.inputs[0] == x
    assert len(y.parent.inputs) == 2
    assert len(nn.get_parameters()) == 1
    slope = nn.get_parameters()['prelu/slope']
    assert slope.shape == slope_shape
    assert slope.need_grad
    assert y.parent.inputs[1].need_grad == (not fix_parameters)
    if isinstance(slope_init, np.ndarray):
        assert np.allclose(slope_init, slope.d)
Beispiel #25
0
def main():
    args = get_args()

    nn.load_parameters(args.input)
    params = nn.get_parameters(grad_only=False)

    processed = False

    # Convert memory layout
    layout = get_memory_layout(params)
    if args.memory_layout is None:
        pass
    if args.affine_to_conv:
        rm_list = []
        ret = affine_to_conv(params, args.memory_layout, rm_list)
        for r in rm_list:
            print(r)
            nn.parameter.pop_parameter(r)
        if ret:
            logger.info('Converted affine to  conv.')
        processed |= ret

    if args.memory_layout != layout:
        logger.info(f'Converting memory layout to {args.memory_layout}.')
        convert_memory_layout(params, args.memory_layout)
        processed |= True
    else:
        logger.info('No need to convert memory layout.')
    if args.force_4_channels:
        ret = force_4_channels(params, args.memory_layout)
        if ret:
            logger.info('Converted first conv to 4-channel input.')
        processed |= ret
    if args.force_3_channels:
        ret = force_3_channels(params, args.memory_layout)
        if ret:
            logger.info('Converted first conv to 3-channel input.')
        processed |= ret
    nn.clear_parameters()
    for key, param in params.items():
        print(key)
        print(param.shape)
        nn.parameter.set_parameter(key, param)
    if not processed:
        logger.info(
            'No change has been made for the input. Not saving a new parameter file.')
        return
    logger.info(f'Save a new parameter file at {args.output}')
    nn.save_parameters(args.output)
def test_save_load_parameters():
    v = nn.Variable([64, 1, 28, 28], need_grad=False)
    with nn.parameter_scope("param1"):
        with nn.parameter_scope("conv1"):
            h = PF.convolution(v, 32, (3, 3))
            b = PF.batch_normalization(h, batch_stat=True)
        with nn.parameter_scope("conv2"):
            h1 = PF.convolution(v, 32, (3, 3))
            b2 = PF.batch_normalization(h1, batch_stat=True)

    for k, v in iteritems(nn.get_parameters(grad_only=False)):
        v.data.cast(np.float32)[...] = np.random.randn(*v.shape)

    with nn.parameter_scope("param1"):
        param1 = nn.get_parameters(grad_only=False)
        nn.save_parameters("tmp.h5")
        nn.save_parameters("tmp.protobuf")

    with nn.parameter_scope("param2"):
        nn.load_parameters('tmp.h5')
        param2 = nn.get_parameters(grad_only=False)

    with nn.parameter_scope("param3"):
        nn.load_parameters('tmp.protobuf')
        param3 = nn.get_parameters(grad_only=False)

    for par2 in [param2, param3]:
        assert param1.keys() == par2.keys()  # Check order
        for (n1, p1), (n2, p2) in zip(sorted(param1.items()),
                                      sorted(par2.items())):
            assert n1 == n2
            assert np.all(p1.d == p2.d)
            if par2 is not param3:
                # NOTE: data is automatically casted to fp32 in Protobuf
                assert p1.data.dtype == p2.data.dtype
            assert p1.need_grad == p2.need_grad
def create_nnabla_net(resnext=False):
    # Create nnabla graph
    from models import senet
    x = nn.Variable((1, 3, 224, 224), need_grad=False)
    if resnext:
        y = senet.se_resnext50(x, 1000, test=True)
    else:
        y = senet.se_resnet50(x, 1000, test=True)
    params = nn.get_parameters(grad_only=False)
    param_dims = 0
    for k, v in params.items():
        param_dims += np.prod(v.shape)
        print(k, v.shape, param_dims)
    print('total parameters: ', param_dims)
    return (x, y), params, param_dims
Beispiel #28
0
def test_compute_simple_hessian(ctx):
    nn.clear_parameters()

    # Network
    state = nn.Variable((1, 2))
    output = PF.affine(state,
                       1,
                       w_init=I.ConstantInitializer(value=1.),
                       b_init=I.ConstantInitializer(value=1.))
    loss = F.sum(output**2)
    # Input
    state_array = np.array([[1.0, 0.5]])
    state.d = state_array

    # Grad of network
    params = nn.get_parameters().values()
    for param in params:
        param.grad.zero()
    grads = nn.grad([loss], params)
    flat_grads = F.concatenate(*[F.reshape(grad, (-1,)) for grad in grads]) if len(grads) > 1 \
        else F.reshape(grads[0], (-1,))

    # Compute hessian
    hessian = np.zeros((flat_grads.shape[0], flat_grads.shape[0]),
                       dtype=np.float32)
    for i in range(flat_grads.shape[0]):
        flat_grads_i = flat_grads[i]
        flat_grads_i.forward()
        for param in params:
            param.grad.zero()
        flat_grads_i.backward()
        num_index = 0
        for param in params:
            grad = param.g.flatten()  # grad of grad so this is hessian
            hessian[i, num_index:num_index + len(grad)] = grad
            num_index += len(grad)

    actual = hessian
    expected = np.array([[
        2 * state_array[0, 0]**2, 2 * state_array[0, 0] * state_array[0, 1],
        2 * state_array[0, 0]
    ],
                         [
                             2 * state_array[0, 0] * state_array[0, 1],
                             2 * state_array[0, 1]**2, 2 * state_array[0, 1]
                         ], [2 * state_array[0, 0], 2 * state_array[0, 1],
                             2.]])
    assert_allclose(actual, expected)
def data_distill(model, uniform_data_iterator, num_iter):
    generated_img = []
    for _ in range(uniform_data_iterator.size //
                   uniform_data_iterator.batch_size):
        img, _ = uniform_data_iterator.next()
        dst_img = nn.Variable(img.shape, need_grad=True)
        dst_img.d = img
        img_params = OrderedDict()
        img_params['img'] = dst_img

        init_lr = 0.5
        solver = S.Adam(alpha=init_lr)
        solver.set_parameters(img_params)
        #scheduler = lr_scheduler.CosineScheduler(init_lr=0.5, max_iter=num_iter)
        scheduler = ReduceLROnPlateauScheduler(init_lr=init_lr,
                                               min_lr=1e-4,
                                               verbose=False,
                                               patience=100)
        dummy_solver = S.Sgd(lr=0)
        dummy_solver.set_parameters(nn.get_parameters())

        for it in tqdm(range(num_iter)):
            lr = scheduler.get_learning_rate()
            solver.set_learning_rate(lr)

            global outs
            outs = []
            global batch_stats
            batch_stats = []

            y = model(denormalize(dst_img),
                      force_global_pooling=True,
                      training=False)  # denormalize to U(0, 255)
            y.forward(function_post_hook=get_output)
            assert len(outs) == len(batch_stats)
            loss = zeroq_loss(batch_stats, outs, dst_img)
            loss.forward()
            solver.zero_grad()
            dummy_solver.zero_grad()
            loss.backward()
            solver.weight_decay(1e-6)
            solver.update()

            scheduler.update_lr(loss.d)

        generated_img.append(dst_img.d)

    return generated_img
 def mNextParam(self, idx):
     if np.random.rand() > 0.8:
         with nn.parameter_scope(net(7)):
             param = nn.get_parameters()
             for i, j in param.items():
                 self.mParam[net(idx)]["pre"][i] = self.mParam[net(
                     idx)]["next"][i].copy()
                 self.mParam[net(idx)]["next"][i] = param.get(i).d
     else:
         for i in self.mParam[net(idx)]["next"].keys():
             self.mParam[net(idx)]["pre"][i] = self.mParam[net(
                 idx)]["next"][i].copy()
             self.mParam[net(idx)]["next"][i] = np.random.randn(
                 *(self.mParam[net(idx)]["next"][i].shape))
         return
     return
Beispiel #31
0
    def __init__(self, graph, device_id, ext_name, solver=None, n_run=100, max_measure_execution_time=1,
                 time_scale="m"):
        self.graph = graph
        # if solver is None, training time (forward + backward + update) is not calculated
        self.solver = solver
        self.n_run = n_run
        self.device_id = str(device_id)
        self.ext_name = ext_name
        self.ext_module = import_extension_module(self.ext_name)
        self.max_measure_execution_time = max_measure_execution_time
        self.time_scale = time_scale
        self.result = dict()
        self.name2val = {v: k for k, v in nn.get_parameters().items()}

        if self.n_run < 1:
            raise AssertionError("n_run must be bigger than 1")
Beispiel #32
0
    def create_graphviz_digraph(self, vleaf, format=None):
        '''
        Create a :obj:`graphviz.Digraph` object given the leaf variable of a
        computation graph.

        One of nice things of getting ``Digraph`` directly is that the drawn
        graph can be displayed inline in a Jupyter notebook as described in
        `Graphviz documentation <https://graphviz.readthedocs.io/en/stable/manual.html#jupyter-notebooks>`_.

        Args:
            vleaf (`nnabla.Variable`):
                End variable. All variables and functions which can be
                traversed from this variable are shown in the reuslt.
            format (str):
                Force overwrite ``format`` (``'pdf', 'png', ...)``) configuration.

        Returns: graphviz.Digraph

        '''
        from nnabla import get_parameters
        import copy
        try:
            from graphviz import Digraph
        except:
            raise ImportError("Install graphviz. `pip install graphviz.`")
        if format is None:
            format = self._format
        graph = Digraph(format=format)
        graph.attr("node", style="filled")

        params = get_parameters(grad_only=False)
        var2name = {v.data: k for k, v in params.items()}
        fun2scope = {}
        var2postname = copy.copy(var2name)

        def fscope(f):
            names = [var2name[v.data] for v in f.inputs if v.data in var2name]
            if names:
                c = os.path.commonprefix(names)
                fun2scope[f] = c
                for n in names:
                    var2postname[params[n].data] = n[len(c):]
        vleaf.visit(fscope)
        func = self.functor(graph, self._verbose,
                            fun2scope=fun2scope, var2name=var2postname)
        vleaf.visit(func)
        return graph
Beispiel #33
0
 def get_parameters(self, recursive=True, grad_only=False, memo=None):
     params = OrderedDict()
     if memo is None:
         memo = ParamMemo()
     if recursive:
         for name, module in self.submodules.items():
             params.update(
                 insert_parent_name(
                     name,
                     module.get_parameters(recursive=recursive,
                                           grad_only=grad_only,
                                           memo=memo)))
     with nn.parameter_scope('', self.parameter_scope):
         found_params = nn.get_parameters(grad_only=grad_only)
         filtered_params = memo.filter_and_update(found_params)
         params.update(filtered_params)
     return params
Beispiel #34
0
def train(max_iter=60000):
    # Initialize data provider
    di_l = I.data_iterator_mnist(batch_size, True)
    di_t = I.data_iterator_mnist(batch_size, False)

    # Network
    shape_x = (1, 28, 28)
    shape_z = (50, )
    x = nn.Variable((batch_size, ) + shape_x)
    loss_l = I.vae(x, shape_z, test=False)
    loss_t = I.vae(x, shape_z, test=True)

    # Create solver
    solver = S.Adam(learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitors for training and validation
    path = cache_dir(os.path.join(I.name, "monitor"))
    monitor = M.Monitor(path)
    monitor_train_loss = M.MonitorSeries("train_loss", monitor, interval=600)
    monitor_val_loss = M.MonitorSeries("val_loss", monitor, interval=600)
    monitor_time = M.MonitorTimeElapsed("time", monitor, interval=600)

    # Training Loop.
    for i in range(max_iter):

        # Initialize gradients
        solver.zero_grad()

        # Forward, backward and update
        x.d, _ = di_l.next()
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(weight_decay)
        solver.update()

        # Forward for test
        x.d, _ = di_t.next()
        loss_t.forward(clear_no_need_grad=True)

        # Monitor for logging
        monitor_train_loss.add(i, loss_l.d.copy())
        monitor_val_loss.add(i, loss_t.d.copy())
        monitor_time.add(i)

    return path
Beispiel #35
0
    def test_propagate(self):
        """

        The graph used bellow is from Fig.1 in the original paper (https://arxiv.org/pdf/1511.05493.pdf)
        """
        edges = {"B": [(0, 1), (3, 2)], "C": [(2, 1), (1, 3)]}

        with nn.parameter_scope("test_propagate"):
            vertices = nn.Variable((4, 1))
            outputs = L.propagate(vertices, edges)
            params = nn.get_parameters()

            self.assertEqual((4, 1), outputs.shape)
            self.assertEqual(8, len(params))
            self.assertEqual((1, 3, 1), params["W_zr/affine/W"].shape)
            self.assertEqual((1, 2, 1), params["U_zr/affine/W"].shape)
            self.assertEqual((1, 1), params["U/affine/W"].shape)
Beispiel #36
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = int(n_train_data / batch_size)
    n_iter = n_epoch * iter_epoch
    extension_module = args.context
    alpha = args.alpha

    # Supervised Model 
    ## ERM
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l_0 = nn.Variable((batch_size, m, h, w))
    y_l_0 = nn.Variable((batch_size, 1))
    pred = cnn_model_003(ctx, x_l_0)
    loss_ce = ce_loss(ctx, pred, y_l_0)
    loss_er = er_loss(ctx, pred)
    loss_supervised = loss_ce + loss_er
    ## VRM (mixup)
    x_l_1 = nn.Variable((batch_size, m, h, w))
    y_l_1 = nn.Variable((batch_size, 1))
    coef = nn.Variable()
    coef_b = F.broadcast(coef.reshape([1]*x_l_0.ndim, unlink=True), x_l_0.shape)
    x_l_m = coef_b * x_l_0 + (1 - coef_b) * x_l_1
    coef_b = F.broadcast(coef.reshape([1]*pred.ndim, unlink=True), pred.shape)
    y_l_m = coef_b * F.one_hot(y_l_0, (n_cls, )) \
            + (1-coef_b) * F.one_hot(y_l_1, (n_cls, ))
    x_l_m.need_grad, y_l_m.need_grad = False, False
    pred_m = cnn_model_003(ctx, x_l_m)
    loss_er_m = er_loss(ctx, pred_m)  #todo: need?
    loss_ce_m = ce_loss_soft(ctx, pred, y_l_m)
    loss_supervised_m = loss_ce_m #+ loss_er_m
    
    # Semi-Supervised Model
    ## ERM
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0 = cnn_model_003(ctx, x_u0)
    pred_x_u1 = cnn_model_003(ctx, x_u1)
    pred_x_u0.persistent, pred_x_u1.persistent = True, True
    loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1
    ## VRM (mixup)
    x_u2 = nn.Variable((batch_size, m, h, w))  # not to overwrite x_u1.d
    coef_u = nn.Variable()
    coef_u_b = F.broadcast(coef_u.reshape([1]*x_u0.ndim, unlink=True), x_u0.shape)
    x_u_m = coef_u_b * x_u0 + (1-coef_u_b) * x_u2
    pred_x_u0_ = nn.Variable(pred_x_u0.shape)  # unlink forward pass but reuse result
    pred_x_u1_ = nn.Variable(pred_x_u1.shape)
    pred_x_u0_.data = pred_x_u0.data
    pred_x_u1_.data = pred_x_u1.data
    coef_u_b = F.broadcast(coef_u.reshape([1]*pred_x_u0.ndim, unlink=True), pred_x_u0.shape)
    y_u_m = coef_u_b * pred_x_u0_ + (1-coef_u_b) * pred_x_u1_
    x_u_m.need_grad, y_u_m.need_grad = False, False
    pred_x_u_m = cnn_model_003(ctx, x_u_m)
    loss_er_u_m = er_loss(ctx, pred_x_u_m)  #todo: need?
    loss_ce_u_m = ce_loss_soft(ctx, pred_x_u_m, y_u_m)
    loss_unsupervised_m = loss_ce_u_m #+ loss_er_u_m
    
    # Evaluatation Model
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    ve_best = 1.
    save_path_prev = ""
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l_0.d, _ , y_l_0.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        ## forward (supervised and its mixup)
        loss_supervised.forward(clear_no_need_grad=True)
        coef_data = np.random.beta(alpha, alpha)
        coef.d = coef_data
        x_l_1.d = np.random.permutation(x_l0_data)
        y_l_1.d = np.random.permutation(y_l_data)
        loss_supervised_m.forward(clear_no_need_grad=True)
        ## forward (unsupervised and its mixup)
        loss_unsupervised.forward(clear_no_need_grad=True)
        coef_data = np.random.beta(alpha, alpha)
        coef_u.d = coef_data
        x_u2.d = np.random.permutation(x_u1_data)
        loss_unsupervised_m.forward(clear_no_need_grad=True)
        
        ## backward
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=False)
        loss_supervised_m.backward(clear_buffer=False)
        loss_unsupervised.backward(clear_buffer=False)
        loss_unsupervised_m.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if int((i+1) % iter_epoch) == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            ve /= iter_val                
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve) * 100)
            print(msg)
            if ve < ve_best:
                if not os.path.exists(args.model_save_path):
                    os.makedirs(args.model_save_path)
                if save_path_prev != "":
                    os.remove(save_path_prev)
                save_path = os.path.join(
                    args.model_save_path, 'params_%06d.h5' % epoch)
                nn.save_parameters(save_path)
                save_path_prev = save_path
                ve_best = ve
            st = time.time()
            epoch +=1
Beispiel #37
0
def train():
    """
    Main script.
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Dataset
    # We use Tiny ImageNet from Stanford CS231N class.
    # https://tiny-imagenet.herokuapp.com/
    # Tiny ImageNet consists of 200 categories, each category has 500 images
    # in training set. The image size is 64x64. To adapt ResNet into 64x64
    # image inputs, the input image size of ResNet is set as 56x56, and
    # the stride in the first conv and the first max pooling are removed.
    data = data_iterator_tiny_imagenet(args.batch_size, 'train')
    vdata = data_iterator_tiny_imagenet(args.batch_size, 'val')

    num_classes = 200
    tiny = True  # TODO: Switch ILSVRC2012 dataset and TinyImageNet.
    t_model = get_model(
        args, num_classes, test=False, tiny=tiny)
    t_model.pred.persistent = True  # Not clearing buffer of pred in backward
    v_model = get_model(
        args, num_classes, test=True, tiny=tiny)
    v_model.pred.persistent = True  # Not clearing buffer of pred in forward

    # Create Solver.
    solver = S.Momentum(args.learning_rate, 0.9)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = M.MonitorSeries("Training error", monitor, interval=10)
    monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10)
    monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10)

    # Training loop.
    for i in range(args.max_iter):
        # Save parameters
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'param_%06d.h5' % i))

        # Validation
        if i % args.val_interval == 0:

            # Clear all intermediate memory to save memory.
            # t_model.loss.clear_recursive()

            l = 0.0
            e = 0.0
            for j in range(args.val_iter):
                images, labels = vdata.next()
                v_model.image.d = images
                v_model.label.d = labels
                v_model.image.data.cast(np.uint8, ctx)
                v_model.label.data.cast(np.int32, ctx)
                v_model.loss.forward(clear_buffer=True)
                l += v_model.loss.d
                e += categorical_error(v_model.pred.d, v_model.label.d)
            monitor_vloss.add(i, l / args.val_iter)
            monitor_verr.add(i, e / args.val_iter)

            # Clear all intermediate memory to save memory.
            # v_model.loss.clear_recursive()

        # Training
        l = 0.0
        e = 0.0
        solver.zero_grad()

        # Gradient accumulation loop
        for j in range(args.accum_grad):
            images, labels = data.next()
            t_model.image.d = images
            t_model.label.d = labels
            t_model.image.data.cast(np.uint8, ctx)
            t_model.label.data.cast(np.int32, ctx)
            t_model.loss.forward(clear_no_need_grad=True)
            t_model.loss.backward(clear_buffer=True)  # Accumulating gradients
            l += t_model.loss.d
            e += categorical_error(t_model.pred.d, t_model.label.d)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, l / args.accum_grad)
        monitor_err.add(i, e / args.accum_grad)
        monitor_time.add(i)

        # Learning rate decay at scheduled iter
        if i in args.learning_rate_decay_at:
            solver.set_learning_rate(solver.learning_rate() * 0.1)
    nn.save_parameters(os.path.join(args.model_save_path,
                                    'param_%06d.h5' % args.max_iter))
Beispiel #38
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = int(n_train_data / batch_size)
    n_iter = n_epoch * iter_epoch
    extension_module = args.context
    rampups_x = np.linspace(0, 1, 50)
    rampups = np.exp(-5 * (1 - rampups_x)**2)

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred = cnn_model_003(ctx, x_l)
    loss_ce = ce_loss(ctx, pred, y_l)
    loss_er = er_loss(ctx, pred)
    loss_supervised = loss_ce + loss_er

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0 = cnn_model_003(ctx, x_u0)
    pred_x_u1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    coef = nn.Variable()
    coef.d = 0
    loss_unsupervised = coef * loss_sr + loss_er0 + loss_er1

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    ve_best = 1.
    save_path_prev = ""
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if int((i+1) % iter_epoch) == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            ve /= iter_val                
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve) * 100)
            print(msg)
            st = time.time()
            epoch +=1
            coef.d = rampups[epoch]
Beispiel #39
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    # TRAIN

    # Fake path
    z = nn.Variable([args.batch_size, 100, 1, 1])
    fake = generator(z)
    fake.persistent = True  # Not to clear at backward
    pred_fake = discriminator(fake)
    loss_gen = F.mean(F.sigmoid_cross_entropy(
        pred_fake, F.constant(1, pred_fake.shape)))
    fake_dis = fake.unlinked()
    pred_fake_dis = discriminator(fake_dis)
    loss_dis = F.mean(F.sigmoid_cross_entropy(
        pred_fake_dis, F.constant(0, pred_fake_dis.shape)))

    # Real path
    x = nn.Variable([args.batch_size, 1, 28, 28])
    pred_real = discriminator(x)
    loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real,
                                               F.constant(1, pred_real.shape)))

    # Create Solver.
    solver_gen = S.Adam(args.learning_rate, beta1=0.5)
    solver_dis = S.Adam(args.learning_rate, beta1=0.5)
    with nn.parameter_scope("gen"):
        solver_gen.set_parameters(nn.get_parameters())
    with nn.parameter_scope("dis"):
        solver_dis.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10)
    monitor_loss_dis = M.MonitorSeries(
        "Discriminator loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100)
    monitor_fake = M.MonitorImageTile(
        "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.)

    data = data_iterator_mnist(args.batch_size, True)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.model_save_interval == 0:
            with nn.parameter_scope("gen"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "generator_param_%06d.h5" % i))
            with nn.parameter_scope("dis"):
                nn.save_parameters(os.path.join(
                    args.model_save_path, "discriminator_param_%06d.h5" % i))

        # Training forward
        image, _ = data.next()
        x.d = image / 255. - 0.5  # [0, 255] to [-1, 1]
        z.d = np.random.randn(*z.shape)

        # Generator update.
        solver_gen.zero_grad()
        loss_gen.forward(clear_no_need_grad=True)
        loss_gen.backward(clear_buffer=True)
        solver_gen.weight_decay(args.weight_decay)
        solver_gen.update()
        monitor_fake.add(i, fake)
        monitor_loss_gen.add(i, loss_gen.d.copy())

        # Discriminator update.
        solver_dis.zero_grad()
        loss_dis.forward(clear_no_need_grad=True)
        loss_dis.backward(clear_buffer=True)
        solver_dis.weight_decay(args.weight_decay)
        solver_dis.update()
        monitor_loss_dis.add(i, loss_dis.d.copy())
        monitor_time.add(i)

    nnp = os.path.join(
        args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Generator',
             'batch_size': args.batch_size,
             'outputs': {'G': fake},
             'names': {'z': z}},
            {'name': 'Discriminator',
             'batch_size': args.batch_size,
             'outputs': {'D': pred_real},
             'names': {'x': x}}],
        'executors': [
            {'name': 'Generator',
             'network': 'Generator',
             'data': ['z'],
             'output': ['G']},
            {'name': 'Discriminator',
             'network': 'Discriminator',
             'data': ['x'],
             'output': ['D']}]}

    save.save(nnp, runtime_contents)
    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
Beispiel #40
0
def train(args):
    """
    Main script.
    """

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    margin = 1.0  # Margin for contrastive loss.

    # TRAIN
    # Create input variables.
    image0 = nn.Variable([args.batch_size, 1, 28, 28])
    image1 = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size])
    # Create predition graph.
    pred = mnist_lenet_siamese(image0, image1, test=False)
    # Create loss function.
    loss = F.mean(contrastive_loss(pred, label, margin))

    # TEST
    # Create input variables.
    vimage0 = nn.Variable([args.batch_size, 1, 28, 28])
    vimage1 = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size])
    # Create predition graph.
    vpred = mnist_lenet_siamese(vimage0, vimage1, test=True)
    vloss = F.mean(contrastive_loss(vpred, vlabel, margin))

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    import nnabla.monitor as M
    monitor = M.Monitor(args.monitor_path)
    monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10)
    monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    rng = np.random.RandomState(313)
    data = siamese_data_iterator(args.batch_size, True, rng)
    vdata = siamese_data_iterator(args.batch_size, False, rng)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage0.d, vimage1.d, vlabel.d = vdata.next()
                vloss.forward(clear_buffer=True)
                ve += vloss.d
            monitor_vloss.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        image0.d, image1.d, label.d = data.next()
        solver.zero_grad()
        # Training forward, backward and update
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        monitor_loss.add(i, loss.d.copy())
        monitor_time.add(i)

    parameter_file = os.path.join(
        args.model_save_path, 'params_%06d.h5' % args.max_iter)
    nn.save_parameters(parameter_file)

    nnp_file = os.path.join(
        args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter))
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batch_size,
             'outputs': {'y': vpred},
             'names': {'x0': vimage0, 'x1': vimage1}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x0', 'x1'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [
                      vimage0, vimage1], vpred, nnp_file)
def test_data_parallel_communicator():
    try:
        import nnabla_ext
        import nnabla_ext.cuda
        from nnabla.contrib.context import extension_context

    except:
        pytest.skip("DataParallelCommunicator are only supported in CUDA now.")

    n_devices = nnabla_ext.cuda.init.get_device_count()
    if n_devices < 2:
        pytest.skip("Number of cuda devices is less than 2.")

    # Contexts and Computation Graph
    extension_module = "cuda"
    ctxs = []
    for d in range(n_devices):
        ctx = extension_context(extension_module,
                                device_id="{}".format(d))
        ctxs.append(ctx)
        with nn.context_scope(ctx):
            x_data = np.random.rand(4, 5)
            x = nn.Variable(x_data.shape)
            with nn.parameter_scope("gpu{}".format(d)):
                with nn.parameter_scope("affine1"):
                    z = PF.affine(x, 6)
                with nn.parameter_scope("affine2"):
                    y = PF.affine(z, 5)

    # Init w.g
    grads = []
    for d in range(n_devices):
        with nn.parameter_scope("gpu{}".format(d)):
            params = nn.get_parameters()
            grad = []
            for i, elm in enumerate(params.items()):
                k, v = elm
                grad_ = np.random.randn(*v.shape)
                v.g = grad_
                v.grad.cast(np.float32, ctxs[d])
                grad.append(grad_)
            grads.append(grad)

    # Reference
    ref_grads = []
    with nn.parameter_scope("gpu{}".format(d)):
        params = nn.get_parameters()
        for i in range(len(params)):
            ave_grad = 0
            for d in range(n_devices):
                ave_grad += grads[d][i]
            ave_grad /= n_devices
            ref_grads.append(ave_grad)

    # Communicator
    try:
        comm = C.DataParalellCommunicator(ctxs[0])
    except:
        pytest.skip(
            "DataParalellCommunicator is not supported in cpu or not linux platform.")

    for d in range(n_devices):
        with nn.parameter_scope("gpu{}".format(d)):
            comm.add_context_and_parameters(
                (ctxs[d], nn.get_parameters()))
    comm.init()
    comm.allreduce(division=True)

    # Check
    atol = 1e-6
    for d in range(n_devices):
        with nn.parameter_scope("gpu{}".format(d)):
            params = nn.get_parameters()
            for i, elm in enumerate(params.items()):
                k, v = elm
                assert np.allclose(ref_grads[i], v.g, atol=atol)
def train():
    """
    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Instantiate a communicator and set parameter variables.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct a computation graph for training and one for validation.
    * Initialize solver and set parameter variables to that.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop
      * Set parameter gradients zero
      * Execute backprop.
      * Inplace allreduce (THIS IS THE MAIN difference from a single device training)
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size

    # Communicator and Context
    extension_module = "cuda.cudnn"
    ctx = extension_context(extension_module)
    comm = C.MultiProcessDataParalellCommunicator(ctx)
    comm.init()
    n_devices = comm.size
    mpi_rank = comm.rank
    device_id = mpi_rank
    ctx = extension_context(extension_module, device_id=device_id)

    # Create training graphs
    test = False
    image_train = nn.Variable((args.batch_size, 3, 32, 32))
    label_train = nn.Variable((args.batch_size, 1))
    pred_train = cifar100_resnet23_prediction(
        image_train, ctx, test)
    loss_train = cifar100_resnet32_loss(pred_train, label_train)
    input_image_train = {"image": image_train, "label": label_train}

    # add parameters to communicator
    comm.add_context_and_parameters((ctx, nn.get_parameters()))

    # Create validation graph
    test = True
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = cifar100_resnet23_prediction(
        image_valid, ctx, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solver = S.Adam()
    solver.set_parameters(nn.get_parameters())
    base_lr = args.learning_rate
    warmup_iter = int(1. * n_train_samples /
                      args.batch_size / n_devices) * args.warmup_epoch
    warmup_slope = 1. * n_devices / warmup_iter

    # Create monitor
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)
    with data_iterator_cifar100(args.batch_size, True) as tdata, \
            data_iterator_cifar100(bs_valid, False) as vdata:
        # Training-loop
        for i in range(int(args.max_iter / n_devices)):
            # Validation
            if mpi_rank == 0:
                if i % int(n_train_samples / args.batch_size / n_devices) == 0:
                    ve = 0.
                    for j in range(args.val_iter):
                        image, label = vdata.next()
                        input_image_valid["image"].d = image
                        pred_valid.forward()
                        ve += categorical_error(pred_valid.d, label)
                    ve /= args.val_iter
                    monitor_verr.add(i * n_devices, ve)
                if i % int(args.model_save_interval / n_devices) == 0:
                    nn.save_parameters(os.path.join(
                        args.model_save_path, 'params_%06d.h5' % i))

            # Forward/Zerograd/Backward
            image, label = tdata.next()
            input_image_train["image"].d = image
            input_image_train["label"].d = label
            loss_train.forward()
            solver.zero_grad()
            loss_train.backward()

            # In-place Allreduce
            comm.allreduce(division=True)

            # Solvers update
            solver.update()

            # Linear Warmup
            if i < warmup_iter:
                lr = base_lr * n_devices * warmup_slope * i
                solver.set_learning_rate(lr)
            else:
                lr = base_lr * n_devices
                solver.set_learning_rate(lr)

            if mpi_rank == 0:
                e = categorical_error(
                    pred_train.d, input_image_train["label"].d)
                monitor_loss.add(i * n_devices, loss_train.d.copy())
                monitor_err.add(i * n_devices, e)
                monitor_time.add(i * n_devices)
    if mpi_rank == 0:
        nn.save_parameters(os.path.join(
            args.model_save_path,
            'params_%06d.h5' % (args.max_iter / n_devices)))
Beispiel #43
0
def train():
    """
    Main script.

    Steps:

    * Parse command line arguments.
    * Specify a context for computation.
    * Initialize DataIterator for MNIST.
    * Construct a computation graph for training and validation.
    * Initialize a solver and set parameter variables to it.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprop on the training graph.
      * Compute training error
      * Set parameter gradients zero
      * Execute backprop.
      * Solver updates parameters by using gradients computed by backprop.
    """
    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create CNN network for both training and testing.
    mnist_cnn_prediction = mnist_lenet_prediction
    if args.net == 'resnet':
        mnist_cnn_prediction = mnist_resnet_prediction

    # TRAIN
    # Create input variables.
    image = nn.Variable([args.batch_size, 1, 28, 28])
    label = nn.Variable([args.batch_size, 1])
    # Create prediction graph.
    pred = mnist_cnn_prediction(image, test=False)
    pred.persistent = True
    # Create loss function.
    loss = F.mean(F.softmax_cross_entropy(pred, label))

    # TEST
    # Create input variables.
    vimage = nn.Variable([args.batch_size, 1, 28, 28])
    vlabel = nn.Variable([args.batch_size, 1])
    # Create predition graph.
    vpred = mnist_cnn_prediction(vimage, test=True)

    # Create Solver.
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)

    # Initialize DataIterator for MNIST.
    data = data_iterator_mnist(args.batch_size, True)
    vdata = data_iterator_mnist(args.batch_size, False)
    # Training loop.
    for i in range(args.max_iter):
        if i % args.val_interval == 0:
            # Validation
            ve = 0.0
            for j in range(args.val_iter):
                vimage.d, vlabel.d = vdata.next()
                vpred.forward(clear_buffer=True)
                ve += categorical_error(vpred.d, vlabel.d)
            monitor_verr.add(i, ve / args.val_iter)
        if i % args.model_save_interval == 0:
            nn.save_parameters(os.path.join(
                args.model_save_path, 'params_%06d.h5' % i))
        # Training forward
        image.d, label.d = data.next()
        solver.zero_grad()
        loss.forward(clear_no_need_grad=True)
        loss.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()
        e = categorical_error(pred.d, label.d)
        monitor_loss.add(i, loss.d.copy())
        monitor_err.add(i, e)
        monitor_time.add(i)

    ve = 0.0
    for j in range(args.val_iter):
        vimage.d, vlabel.d = vdata.next()
        vpred.forward(clear_buffer=True)
        ve += categorical_error(vpred.d, vlabel.d)
    monitor_verr.add(i, ve / args.val_iter)

    parameter_file = os.path.join(
        args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter))
    nn.save_parameters(parameter_file)
Beispiel #44
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model
    ## supervised cnn
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    x_l.persistent = True
    y_l = nn.Variable((batch_size, 1))
    y_l.persistent = True
    pred = cnn_model_003(ctx, "cnn", x_l)
    loss_ce = ce_loss(ctx, pred, y_l)
    loss_er = er_loss(ctx, pred)
    loss_supervised = loss_ce + loss_er

    ## supervised resnet
    pred_res = cifar10_resnet23_prediction(ctx, "resnet", x_l)
    loss_res_ce = ce_loss(ctx, pred_res, y_l)
    loss_res_supervised = loss_res_ce
    
    ## stochastic regularization for cnn
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u0.persistent = True
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0 = cnn_model_003(ctx, "cnn", x_u0)
    pred_x_u0.persistent = True
    pred_x_u1 = cnn_model_003(ctx, "cnn", x_u1)
    loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1

    ## knowledge transfer for resnet
    pred_res_x_u0 = cifar10_resnet23_prediction(ctx, "resnet", x_u0)
    loss_res_unsupervised = kl_divergence(ctx, pred_res_x_u0, pred_x_u0)

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    x_eval.persistent = True  # reused
    pred_eval = cnn_model_003(ctx, "cnn", x_eval, test=True)
    pred_res_eval = cifar10_resnet23_prediction(ctx, "resnet", x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        with nn.parameter_scope("cnn"):
            solver = S.Adam(alpha=learning_rate)
            solver.set_parameters(nn.get_parameters())
        with nn.parameter_scope("resnet"):
            solver_res = S.Adam(alpha=learning_rate)
            solver_res.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train for cnn
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()

        # Train for resnet
        loss_res_supervised.forward(clear_no_need_grad=True)
        loss_res_unsupervised.forward(clear_no_need_grad=True)
        solver_res.zero_grad()
        loss_res_supervised.backward(clear_buffer=True)
        pred_x_u0.need_grad = False  # no need grad for teacher
        loss_res_unsupervised.backward(clear_buffer=True)
        solver_res.update()
        pred_x_u0.need_grad = True
        
        # Evaluate
        if (i+1) % iter_epoch == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop for cnn
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Model:cnn,Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)

            # Evaluation loop for resnet
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_res_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_res_eval.d, label)
                iter_val += 1
            msg = "Model:resnet,Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)

            st = time.time()
            epoch +=1
Beispiel #45
0
def main():

    # Get arguments
    args = get_args()
    data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt"
    model_file = args.work_dir + "model.h5"

    # Load Dataset
    itow, wtoi, dataset = load_ptbset(data_file)

    # Computation environment settings
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    # Create data provider
    n_word = len(wtoi)
    n_dim = args.embed_dim
    batchsize = args.batchsize
    half_window = args.half_window_length
    n_negative = args.n_negative_sample

    di = DataIteratorForEmbeddingLearning(
        batchsize=batchsize,
        half_window=half_window,
        n_negative=n_negative,
        dataset=dataset)

    # Create model
    # - Real batch size including context samples and negative samples
    size = batchsize * (1 + n_negative) * (2 * (half_window - 1))

    # Model for learning
    # - input variables
    xl = nn.Variable((size,))  # variable for word
    yl = nn.Variable((size,))  # variable for context

    # Embed layers for word embedding function
    # - f_embed : word index x to get y, the n_dim vector
    # --  for each sample in a minibatch
    hx = PF.embed(xl, n_word, n_dim, name="e1")  # feature vector for word
    hy = PF.embed(yl, n_word, n_dim, name="e1")  # feature vector for context
    hl = F.sum(hx * hy, axis=1)

    # -- Approximated likelihood of context prediction
    # pos: word context, neg negative samples
    tl = nn.Variable([size, ], need_grad=False)
    loss = F.sigmoid_cross_entropy(hl, tl)
    loss = F.mean(loss)

    # Model for test of searching similar words
    xr = nn.Variable((1,), need_grad=False)
    hr = PF.embed(xr, n_word, n_dim, name="e1")  # feature vector for test

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Create monitor.
    monitor = M.Monitor(args.work_dir)
    monitor_loss = M.MonitorSeries(
        "Training loss", monitor, interval=args.monitor_interval)
    monitor_time = M.MonitorTimeElapsed(
        "Training time", monitor, interval=args.monitor_interval)

    # Do training
    max_epoch = args.max_epoch
    for epoch in range(max_epoch):

        # iteration per epoch
        for i in range(di.n_batch):

            # get minibatch
            xi, yi, ti = di.next()

            # learn
            solver.zero_grad()
            xl.d, yl.d, tl.d = xi, yi, ti
            loss.forward(clear_no_need_grad=True)
            loss.backward(clear_buffer=True)
            solver.update()

            # monitor
            itr = epoch * di.n_batch + i
            monitor_loss.add(itr, loss.d)
            monitor_time.add(itr)

    # Save model
    nn.save_parameters(model_file)

    # Evaluate by similarity
    max_check_words = args.max_check_words
    for i in range(max_check_words):

        # prediction
        xr.d = i
        hr.forward(clear_buffer=True)
        h = hr.d

        # similarity calculation
        w = nn.get_parameters()['e1/embed/W'].d
        s = np.sqrt((w * w).sum(1))
        w /= s.reshape((s.shape[0], 1))
        similarity = w.dot(h[0]) / s[i]

        # for understanding
        output_similar_words(itow, i, similarity)
Beispiel #46
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = 100
    batch_size_eval = 100
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred = cnn_model_003(ctx, x_l)
    loss_ce = ce_loss(ctx, pred, y_l)
    loss_er = er_loss(ctx, pred)
    loss_supervised = loss_ce + loss_er

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w), need_grad=False)
    x_u1 = nn.Variable((batch_size, m, h, w), need_grad=False)
    pred_x_u0 = cnn_model_003(ctx, x_u0)
    pred_x_u1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1

    ## autoencoder
    path = args.model_path
    nn.load_parameters(path)
    x_u0_rc = cnn_ae_model_000(ctx, x_u0, act=F.relu, test=True)
    x_u1_rc = cnn_ae_model_000(ctx, x_u1, act=F.relu, test=True)
    x_u0_rc.need_grad = False
    x_u1_rc.need_grad = False
    pred_x_u0_rc = cnn_model_003(ctx, x_u0_rc, test=False)
    pred_x_u1_rc = cnn_model_003(ctx, x_u1_rc, test=False)
    loss_sr_rc = sr_loss(ctx, pred_x_u0_rc, pred_x_u1_rc)
    loss_er0_rc = er_loss(ctx, pred_x_u0_rc)
    loss_er1_rc = er_loss(ctx, pred_x_u1_rc)
    loss_unsupervised_rc = loss_sr_rc + loss_er0_rc + loss_er1_rc
    loss_unsupervised += loss_unsupervised_rc

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        solver.update()
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if (i+1) % iter_epoch == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = x_data[k:k+batch_size_eval, :]
                label = y_data[k:k+batch_size_eval, :]
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)
            st = time.time()
            epoch +=1
def train():
    """
    Naive Multi-Device Training

    NOTE: the communicator exposes low-level interfaces

    * Parse command line arguments.
    * Specify contexts for computation.
    * Initialize DataIterator.
    * Construct computation graphs for training and one for validation.
    * Initialize solvers and set parameter variables to those.
    * Instantiate a communicator and set parameter variables.
    * Create monitor instances for saving and displaying training stats.
    * Training loop
      * Computate error rate for validation data (periodically)
      * Get a next minibatch.
      * Execute forwardprops
      * Set parameter gradients zero
      * Execute backprop.
      * Inplace allreduce (THIS IS THE MAIN difference from a single device training)
      * Solver updates parameters by using gradients computed by backprop.
      * Compute training error
    """
    # Parse args
    args = get_args()
    n_train_samples = 50000
    bs_valid = args.batch_size

    # Create contexts
    extension_module = args.context
    if extension_module != "cuda" and \
            extension_module != "cuda.cudnn":
        raise Exception("Use `cuda` or `cuda.cudnn` extension_module.")
    n_devices = args.n_devices
    ctxs = []
    for i in range(n_devices):
        ctx = extension_context(extension_module, device_id=i)
        ctxs.append(ctx)
    ctx = ctxs[-1]

    # Create training graphs
    input_image_train = []
    preds_train = []
    losses_train = []
    test = False
    for i in range(n_devices):
        image = nn.Variable((args.batch_size, 3, 32, 32))
        label = nn.Variable((args.batch_size, 1))
        device_scope_name = "device{}".format(i)

        pred = cifar100_resnet23_prediction(
            image, ctxs[i], device_scope_name, test)
        loss = cifar100_resnet32_loss(pred, label)

        input_image_train.append({"image": image, "label": label})
        preds_train.append(pred)
        losses_train.append(loss)

    # Create validation graph
    test = True
    device_scope_name = "device{}".format(0)
    image_valid = nn.Variable((bs_valid, 3, 32, 32))
    pred_valid = cifar100_resnet23_prediction(
        image_valid, ctxs[i], device_scope_name, test)
    input_image_valid = {"image": image_valid}

    # Solvers
    solvers = []
    for i in range(n_devices):
        with nn.context_scope(ctxs[i]):
            solver = S.Adam()
            device_scope_name = "device{}".format(i)
            with nn.parameter_scope(device_scope_name):
                params = nn.get_parameters()
                solver.set_parameters(params)
            solvers.append(solver)

    # Communicator
    comm = C.DataParalellCommunicator(ctx)
    for i in range(n_devices):
        device_scope_name = "device{}".format(i)
        with nn.parameter_scope(device_scope_name):
            ctx = ctxs[i]
            params = nn.get_parameters()
            comm.add_context_and_parameters((ctx, params))
    comm.init()

    # Create threadpools with one thread
    pools = []
    for _ in range(n_devices):
        pool = ThreadPool(processes=1)
        pools.append(pool)

    # Once forward/backward to safely secure memory
    for device_id in range(n_devices):
        data, label = \
            (np.random.randn(*input_image_train[device_id]["image"].shape),
             (np.random.rand(*input_image_train[device_id]["label"].shape) * 10).astype(np.int32))

        ret = pools[device_id].apply_async(forward_backward,
                                           (input_image_train[device_id]["image"], data,
                                            input_image_train[device_id]["label"], label,
                                               losses_train[device_id], solvers[device_id]))
        ret.get()
        losses_train[device_id].d  # sync to host

    # Create monitor.
    from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed
    monitor = Monitor(args.monitor_path)
    monitor_loss = MonitorSeries("Training loss", monitor, interval=10)
    monitor_err = MonitorSeries("Training error", monitor, interval=10)
    monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100)
    monitor_verr = MonitorSeries("Test error", monitor, interval=10)
    with data_iterator_cifar100(args.batch_size, True) as tdata, \
            data_iterator_cifar100(bs_valid, False) as vdata:
        # Training-loop
        for i in range(int(args.max_iter / n_devices)):
            # Validation
            if i % int(n_train_samples / args.batch_size / n_devices) == 0:
                ve = 0.
                for j in range(args.val_iter):
                    image, label = vdata.next()
                    input_image_valid["image"].d = image
                    pred_valid.forward()
                    ve += categorical_error(pred_valid.d, label)
                ve /= args.val_iter
                monitor_verr.add(i * n_devices, ve)
            if i % int(args.model_save_interval / n_devices) == 0:
                nn.save_parameters(os.path.join(
                    args.model_save_path, 'params_%06d.h5' % i))

            # Forwards/Zerograd/Backwards
            fb_results = []
            for device_id in range(n_devices):
                image, label = tdata.next()

                res = pools[device_id].apply_async(forward_backward,
                                                   (input_image_train[device_id]["image"], image,
                                                    input_image_train[device_id]["label"], label,
                                                    losses_train[device_id], solvers[device_id]))
                fb_results.append(res)
            for device_id in range(n_devices):
                fb_results[device_id].get()

            # In-place Allreduce
            comm.allreduce()

            # Solvers update
            for device_id in range(n_devices):
                solvers[device_id].update()

            e = categorical_error(
                preds_train[-1].d, input_image_train[-1]["label"].d)
            monitor_loss.add(i * n_devices, losses_train[-1].d.copy())
            monitor_err.add(i * n_devices, e)
            monitor_time.add(i * n_devices)

    nn.save_parameters(os.path.join(
        args.model_save_path,
        'params_%06d.h5' % (args.max_iter / n_devices)))
Beispiel #48
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = args.n_label
    n_train_data = 73257
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = args.epoch
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = int(n_epoch * iter_epoch)
    extension_module = args.context

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred = cnn_model_003(ctx, x_l)
    loss_ce = ce_loss(ctx, pred, y_l)
    loss_er = er_loss(ctx, pred)
    loss_supervised = loss_ce + loss_er

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0 = cnn_model_003(ctx, x_u0)
    pred_x_u1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/svhn/train.mat")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/svhn/l_train.mat")
    u_train_path = os.path.join(home, "datasets/svhn/u_train.mat")
    test_path = os.path.join(home, "datasets/svhn/test.mat")

    # data reader
    data_reader = SVHNDataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=False,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    ve_best = 1.
    save_path_prev = ""
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()

        # Evaluate
        if int((i+1) % iter_epoch) == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            ve /= iter_val                
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve) * 100)
            print(msg)
            if ve < ve_best:
                if not os.path.exists(args.model_save_path):
                    os.makedirs(args.model_save_path)
                if save_path_prev != "":
                    os.remove(save_path_prev)
                save_path = os.path.join(
                    args.model_save_path, 'params_%06d.h5' % epoch)
                nn.save_parameters(save_path)
                save_path_prev = save_path
                ve_best = ve
            st = time.time()
            epoch +=1
Beispiel #49
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_sizes = [16, 32, 64]
    batch_size_eval = 64
    c, h, w = 3, 32, 32
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / int(np.mean(batch_sizes))  # approximate epoch
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model (Batch-Stochastic)
    ctx = extension_context(extension_module, device_id=device_id)
    ## supervised
    x_list, y_list, preds, losses_ce = batch_stochastic_supervised_network(
        ctx, batch_sizes, c, h, w)
    
    ## stochastic regularization
    x0_list, x1_list, _, losses_sr = batch_stochastic_unsupervised_network(
        ctx, batch_sizes, c, h, w)

    ## evaluate
    batch_size_eval, m, h, w = batch_size_eval, c, h, w
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_sizes[0],
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    iter_ = 0
    for i in range(n_iter):
        idx = np.random.choice(np.arange(0, len(batch_sizes)))
        idx_u = np.random.choice(np.arange(0, len(batch_sizes)))
        # Get data
        bs = batch_sizes[idx]
        bs_u = batch_sizes[idx_u]
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch(bs)
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch(bs_u)

        #  Set it to the varaibles
        x_l = x_list[idx]
        y_l = y_list[idx]
        x_u0 = x0_list[idx_u]
        x_u1 = x1_list[idx_u]
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_ce = losses_ce[idx]
        loss_sr = losses_sr[idx_u]
        loss_ce.forward(clear_no_need_grad=True)
        loss_sr.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_ce.backward(clear_buffer=True)
        loss_sr.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if (i+1) % iter_epoch == 0:  # approximate epoch
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)
            st = time.time()
            epoch +=1
Beispiel #50
0
def main():
    """
    Main script.

    Steps:
    * Get and set context.
    * Load Dataset
    * Initialize DataIterator.
    * Create Networks
    *   Net for Labeled Data
    *   Net for Unlabeled Data
    *   Net for Test Data
    * Create Solver.
    * Training Loop.
    *   Test
    *   Training
    *     by Labeled Data
    *       Calculate Cross Entropy Loss 
    *     by Unlabeled Data
    *       Estimate Adversarial Direction
    *       Calculate LDS Loss
    """

    args = get_args()

    # Get context.
    from nnabla.contrib.context import extension_context
    extension_module = args.context
    if args.context is None:
        extension_module = 'cpu'
    logger.info("Running in %s" % extension_module)
    ctx = extension_context(extension_module, device_id=args.device_id)
    nn.set_default_context(ctx)

    shape_x = (1, 28, 28)
    n_h = args.n_units
    n_y = args.n_class

    # Load MNist Dataset
    from mnist_data import MnistDataSource
    with MnistDataSource(train=True) as d:
        x_t = d.images
        t_t = d.labels
    with MnistDataSource(train=False) as d:
        x_v = d.images
        t_v = d.labels
    x_t = np.array(x_t / 256.0).astype(np.float32)
    x_t, t_t = x_t[:args.n_train], t_t[:args.n_train]
    x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid]

    # Create Semi-supervised Datasets
    x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class)
    x_u = np.r_[x_l, x_u]
    x_v = np.array(x_v / 256.0).astype(np.float32)

    # Create DataIterators for datasets of labeled, unlabeled and validation
    di_l = DataIterator(args.batchsize_l, [x_l, t_l])
    di_u = DataIterator(args.batchsize_u, [x_u])
    di_v = DataIterator(args.batchsize_v, [x_v, t_v])

    # Create networks
    # feed-forward-net building function
    def forward(x, test=False):
        return mlp_net(x, n_h, n_y, test)

    # Net for learning labeled data
    xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
    hl = forward(xl, test=False)
    tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
    loss_l = F.mean(F.softmax_cross_entropy(hl, tl))

    # Net for learning unlabeled data
    xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
    eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
    loss_u, yu = vat(xu, r, eps, forward, distance)

    # Net for evaluating valiation data
    xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
    hv = forward(xv, test=True)
    tv = nn.Variable((args.batchsize_v, 1), need_grad=False)

    # Create solver
    solver = S.Adam(args.learning_rate)
    solver.set_parameters(nn.get_parameters())

    # Monitor trainig and validation stats.
    import nnabla.monitor as M
    monitor = M.Monitor(args.model_save_path)
    monitor_verr = M.MonitorSeries("Test error", monitor, interval=240)
    monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240)

    # Training Loop.
    t0 = time.time()

    for i in range(args.max_iter):

        # Validation Test
        if i % args.val_interval == 0:
            n_error = calc_validation_error(
                di_v, xv, tv, hv, args.val_iter)
            monitor_verr.add(i, n_error)

        #################################
        ## Training by Labeled Data #####
        #################################

        # input minibatch of labeled data into variables
        xl.d, tl.d = di_l.next()

        # initialize gradients
        solver.zero_grad()

        # forward, backward and update
        loss_l.forward(clear_no_need_grad=True)
        loss_l.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        #################################
        ## Training by Unlabeled Data ###
        #################################

        # input minibatch of unlabeled data into variables
        xu.d, = di_u.next()

        ##### Calculate Adversarial Noise #####

        # Sample random noise
        n = np.random.normal(size=xu.shape).astype(np.float32)

        # Normalize noise vector and input to variable
        r.d = get_direction(n)

        # Set xi, the power-method scaling parameter.
        eps.data.fill(args.xi_for_vat)

        # Calculate y without noise, only once.
        yu.forward(clear_buffer=True)

        # Do power method iteration
        for k in range(args.n_iter_for_power_method):
            # Initialize gradient to receive value
            r.grad.zero()

            # forward, backward, without update
            loss_u.forward(clear_no_need_grad=True)
            loss_u.backward(clear_buffer=True)

            # Normalize gradinet vector and input to variable
            r.d = get_direction(r.g)

        ##### Calculate loss for unlabeled data #####

        # Clear remained gradients
        solver.zero_grad()

        # Set epsilon, the adversarial noise scaling parameter.
        eps.data.fill(args.eps_for_vat)

        # forward, backward and update
        loss_u.forward(clear_no_need_grad=True)
        loss_u.backward(clear_buffer=True)
        solver.weight_decay(args.weight_decay)
        solver.update()

        ##### Learning rate update #####
        if i % args.iter_per_epoch == 0:
            solver.set_learning_rate(
                solver.learning_rate() * args.learning_rate_decay)
        monitor_time.add(i)

    # Evaluate the final model by the error rate with validation dataset
    valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter)
    monitor_verr.add(i, valid_error)
    monitor_time.add(i)

    # Save the model.
    nnp_file = os.path.join(
        args.model_save_path, 'vat_%06d.nnp' % args.max_iter)
    runtime_contents = {
        'networks': [
            {'name': 'Validation',
             'batch_size': args.batchsize_v,
             'outputs': {'y': hv},
             'names': {'x': xv}}],
        'executors': [
            {'name': 'Runtime',
             'network': 'Validation',
             'data': ['x'],
             'output': ['y']}]}
    save.save(nnp_file, runtime_contents)

    from cpp_forward_check import check_cpp_forward
    check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
Beispiel #51
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = int(n_train_data / batch_size)
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model
    views = [global_view, spatial_view, feature_view]
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    feature = cnn_model_003(ctx, x_l)
    loss_supervised = []
    for view in views:
        pred = view(ctx, feature)
        loss_ce = ce_loss(ctx, pred, y_l)
        loss_er = er_loss(ctx, pred)
        loss_supervised += [loss_ce, loss_er]
    loss_supervised = reduce(lambda x, y: x+y, loss_supervised)

    ## cross view loss
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    feature_x_u0 = cnn_model_003(ctx, x_u0)
    feature_x_u1 = cnn_model_003(ctx, x_u1)
    pred_x_u0 = []
    pred_x_u1 = []
    loss_er = []
    loss_unsupervised = []    
    for view in views:
        pred = view(ctx, feature_x_u0)
        pred_x_u0 += [pred]
        loss_er +=[er_loss(ctx, pred)]
        pred = view(ctx, feature_x_u1)
        pred_x_u1 += [pred]
        loss_er += [er_loss(ctx, pred)]
    for pred_a, pred_b in itertools.product(pred_x_u0, pred_x_u1): # multi-view
        if pred_a == pred_b:
            continue
        loss_unsupervised += [sr_loss(ctx, pred_a, pred_b)]
    loss_unsupervised = reduce(lambda x, y: x+y, loss_unsupervised) \
                        + reduce(lambda x, y: x+y, loss_er)

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    feature_eval = cnn_model_003(ctx, x_eval, test=True)
    pred_eval = []
    for view in views:
        pred_eval += [view(ctx, feature_eval)]
        
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    ve_best = 1.
    save_path_prev = ""
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        loss_supervised.forward(clear_no_need_grad=True)
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        loss_unsupervised.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if int((i + 1) % iter_epoch) == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = [0., 0., 0.]
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                feature_eval.forward(clear_buffer=True)
                for i in range(len(pred_eval)):
                    pred_eval[i].forward()
                    ve[i] += categorical_error(pred_eval[i].d, label)
                iter_val += 1
            for i, e in enumerate(ve):
                e /= iter_val
                msg = "Epoch-{}:{},ElapsedTime:{},Acc:{:02f}".format(
                    i,
                    epoch,
                    time.time() - st, 
                    (1. - e) * 100)
                print(msg)
            st = time.time()
            epoch +=1
Beispiel #52
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = args.batch_size
    batch_size_eval = args.batch_size_eval
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 300
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context

    # Model
    ## supervised 
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_l = nn.Variable((batch_size, m, h, w))
    y_l = nn.Variable((batch_size, 1))
    pred, log_var = cnn_model_003(ctx, x_l)
    one = F.constant(1., log_var.shape)
    loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var)
    reg_sigma = sigma_regularization(ctx, log_var, one)
    loss_supervised = loss_ce + reg_sigma

    ## stochastic regularization
    x_u0 = nn.Variable((batch_size, m, h, w))
    x_u1 = nn.Variable((batch_size, m, h, w))
    pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0)
    pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1)
    loss_sr = sr_loss_with_uncertainty(ctx, 
                                       pred_x_u0, pred_x_u1, log_var0, log_var1)
    loss_er0 = er_loss(ctx, pred_x_u0)
    loss_er1 = er_loss(ctx, pred_x_u1)
    reg_sigma0 = sigma_regularization(ctx, log_var0, one)
    reg_sigma1 = sigma_regularization(ctx, log_var1, one)
    loss_unsupervised = loss_sr + loss_er0 + loss_er1 \
                        + reg_sigma0 + reg_sigma1

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval, _ = cnn_model_003(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver_l= S.Adam(alpha=learning_rate)
        solver_l.set_parameters(nn.get_parameters())
        solver_u= S.Adam(alpha=learning_rate)
        solver_u.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch()
        x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch()
        
        x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data
        x_u0.d, x_u1.d= x_u0_data, x_u1_data

        # Train
        ## for supervised loss
        loss_supervised.forward(clear_no_need_grad=True)
        solver_l.zero_grad()
        loss_supervised.backward(clear_buffer=True)
        solver_l.update()
        ## for unsupervised loss
        loss_unsupervised.forward(clear_no_need_grad=True)
        solver_u.zero_grad()
        loss_unsupervised.backward(clear_buffer=True)
        solver_u.update()
        
        # Evaluate
        if (i+1) % iter_epoch == 0:
            # Get data and set it to the varaibles
            x_data, y_data = data_reader.get_test_batch()

            # Evaluation loop
            ve = 0.
            iter_val = 0
            for k in range(0, len(x_data), batch_size_eval):
                x_eval.d = get_test_data(x_data, k, batch_size_eval)
                label = get_test_data(y_data, k, batch_size_eval)
                pred_eval.forward(clear_buffer=True)
                ve += categorical_error(pred_eval.d, label)
                iter_val += 1
            msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format(
                epoch,
                time.time() - st, 
                (1. - ve / iter_val) * 100)
            print(msg)
            st = time.time()
            epoch +=1
Beispiel #53
0
def main(args):
    # Settings
    device_id = args.device_id
    batch_size = 100
    batch_size_eval = 100
    n_l_train_data = 4000
    n_train_data = 50000
    n_cls = 10
    learning_rate = 1. * 1e-3
    n_epoch = 50
    act = F.relu
    iter_epoch = n_train_data / batch_size
    n_iter = n_epoch * iter_epoch
    extension_module = args.context
    n_images = args.n_images 
    fname, _ = os.path.splitext(__file__)
    dpath = "./{}_images_{}".format(fname, int(time.time()))

    # Model
    batch_size, m, h, w = batch_size, 3, 32, 32
    ctx = extension_context(extension_module, device_id=device_id)
    x_u = nn.Variable((batch_size, m, h, w))
    pred = cnn_ae_model_001(ctx, x_u)
    loss_recon = recon_loss(ctx, pred, x_u)

    ## evaluate
    batch_size_eval, m, h, w = batch_size, 3, 32, 32
    x_eval = nn.Variable((batch_size_eval, m, h, w))
    pred_eval = cnn_ae_model_001(ctx, x_eval, test=True)
    
    # Solver
    with nn.context_scope(ctx):
        solver = S.Adam(alpha=learning_rate)
        solver.set_parameters(nn.get_parameters())

    # Dataset
    ## separate dataset
    home = os.environ.get("HOME")
    fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    separator = Separator(n_l_train_data)
    separator.separate_then_save(fpath)

    l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz")
    u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")
    test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz")

    # data reader
    data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path,
                                  batch_size=batch_size,
                                  n_cls=n_cls,
                                  da=True,
                                  shape=True)

    # Training loop
    print("# Training loop")
    epoch = 1
    st = time.time()
    acc_prev = 0.
    for i in range(n_iter):
        # Get data and set it to the varaibles
        x_u_data, _, _ = data_reader.get_u_train_batch()
        x_u.d = x_u_data

        # Train
        loss_recon.forward(clear_no_need_grad=True)
        solver.zero_grad()
        loss_recon.backward(clear_buffer=True)
        solver.update()
        
        # Evaluate
        if (i+1) % iter_epoch == 0:
            # Get data and forward
            x_data, y_data = data_reader.get_test_batch()
            pred_eval.forward(clear_buffer=True)
            images = pred_eval.d

            # Save n images
            if not os.path.exists(dpath):
                os.makedirs(dpath)
            save_images(dpath, epoch, images[:n_images])
            fpath = os.path.join(dpath, "epoch_{:05d}.h5".format(epoch))
            nn.save_parameters(fpath)

            st = time.time()
            epoch +=1