def test_parametric_function_api(): """ Testing :function:`nnabla.parametric_functions.parametric_function_api`. """ import nnabla as nn import inspect nn.clear_parameters() shape = (2, 3, 4) # Signature check spec = inspect.getargspec(dummy_parametric_function) assert spec.args == ['shape', 'f', 'i', 's', 'name'] assert spec.defaults == (10, 1, 'dummy', None) assert dummy_parametric_function.__doc__.splitlines()[0] == 'Doc' # Verify two different ways does the same thing. # Using name argument v = dummy_parametric_function(shape, name='group1') # Using parameter_scope with nn.parameter_scope('group1'): v = dummy_parametric_function(shape) params = nn.get_parameters() assert len(params) == 2 assert list(iterkeys(params)) == ['group1/dummy/p1', 'group1/dummy/p2'] # No scope v = dummy_parametric_function(shape) params = nn.get_parameters() len(params) == 4 assert list(iterkeys(params)) == ['group1/dummy/p1', 'group1/dummy/p2', 'dummy/p1', 'dummy/p2'] nn.clear_parameters()
def main(): batch_size, m, h, w = 4, 3, 32, 32 extension_module = "cpu" device_id = 0 ctx = extension_context(extension_module, device_id=device_id) x_l_data = np.random.randn(batch_size, m, h, w) y_l_data = (np.random.rand(batch_size, 1) * 10).astype(np.int32) x_l = nn.Variable(x_l_data.shape) y_l = nn.Variable(y_l_data.shape) x_l.d = x_l_data y_l.d = y_l_data # CNN print("# CNN") pred = cnn_model_003(ctx, x_l) s = 0 for n, v in nn.get_parameters().iteritems(): n_params = np.prod(v.shape) print(n, n_params) s += n_params print("n_params={}".format(s)) nn.clear_parameters() # Resnet print("# Resnet") inmaps = 256 pred = resnet_model(ctx, x_l, inmaps=inmaps) s = 0 for n, v in nn.get_parameters().iteritems(): n_params = np.prod(v.shape) print(n, n_params) s += n_params print("n_params={}".format(s)) nn.clear_parameters()
def test_save_load_parameters(): v = nn.Variable([64, 1, 28, 28], need_grad=False) with nn.parameter_scope("param1"): with nn.parameter_scope("conv1"): h = PF.convolution(v, 32, (3, 3)) b = PF.batch_normalization(h, batch_stat=True) with nn.parameter_scope("conv2"): h1 = PF.convolution(v, 32, (3, 3)) b2 = PF.batch_normalization(h1, batch_stat=True) for k, v in iteritems(nn.get_parameters(grad_only=False)): v.data.cast(np.float32)[...] = np.random.randn(*v.shape) with nn.parameter_scope("param1"): param1 = nn.get_parameters(grad_only=False) nn.save_parameters("tmp.h5") nn.save_parameters("tmp.protobuf") with nn.parameter_scope("param2"): nn.load_parameters('tmp.h5') param2 = nn.get_parameters(grad_only=False) with nn.parameter_scope("param3"): nn.load_parameters('tmp.protobuf') param3 = nn.get_parameters(grad_only=False) for par2 in [param2, param3]: assert param1.keys() == par2.keys() # Check order for (n1, p1), (n2, p2) in zip(sorted(param1.items()), sorted(par2.items())): assert n1 == n2 assert np.all(p1.d == p2.d) assert p1.data.dtype == p2.data.dtype assert p1.need_grad == p2.need_grad
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 3) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) else: raise ValueError() l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop L.forward(clear_no_need_grad=True) # Backprop # Diff should be initialized since they are always accumulated x.grad.zero() L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) parameters = nn.get_parameters() for param in parameters.values(): param.grad.zero() inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert np.allclose(ngrad, agrad, atol=1.05e-2)
def test_parametric_function_1d(inshape, kernel, multiplier, outshape): base_axis = len(inshape) - 2 sample_channels = inshape[base_axis] outmap_channels = sample_channels * multiplier x = nn.Variable(inshape) y = PF.depthwise_convolution(x, kernel, multiplier=multiplier) p = nn.get_parameters() assert y.shape == outshape assert p['depthwise_conv/W'].shape == (outmap_channels,) + kernel assert p['depthwise_conv/b'].shape == (outmap_channels,) nn.clear_parameters()
def decompose_network_and_set_params(model_load_path, reference, slim, rrate=0.75): # Parameters loaded globally, but call here for consistency nn.load_parameters(model_load_path) # Decompose with nn.parameter_scope(reference): trained_params = nn.get_parameters() # original parameter W = trained_params["fc3/affine/W"].d # original maps inmaps = W.shape[0] outmaps0 = W.shape[1] # new maps, R < N*M / (N+M) * rrate outmaps1 = reduce_maps(inmaps, outmaps0, rrate) # singular value decomposition U, s, V = np.linalg.svd(W, full_matrices=False) S = np.diag(s) SV = S.dot(V) U_approx = U[:, :outmaps1] SV_approx = SV[:outmaps1, :outmaps0] # Set trained parameters and decomposed parameters # set trained parameters with nn.parameter_scope(slim): slim_params = nn.get_parameters() for n, v in trained_params.items(): if not n in slim_params.keys(): continue v_slim = slim_params[n] v_slim.d = v.d # set decomposed parameters and original bias # a new bias is introduced due to decomposition slim_params["fc-d0/affine/W"].d = U_approx slim_params["fc-d1/affine/W"].d = SV_approx b = trained_params["fc3/affine/b"] slim_params["fc-d1/affine/b"].d = b.d # Clear the parameters of the reference net with nn.parameter_scope(reference): nn.clear_parameters()
def test_graph_rewire(seed, clear_buffer): nn.clear_parameters() # A. defining graph definition utility def mlp2(x, scope): with nn.parameter_scope(scope): h = F.tanh(PF.affine(x, 10, name='a1')) h = F.tanh(PF.affine(h, 10, name='a1')) return h # A. Create a graph A. xa = nn.Variable((2, 10), need_grad=True) ya = mlp2(xa, 'a') # B. Create a graph B. xb = nn.Variable((2, 10), need_grad=True) yb = mlp2(xb, 'b') # C. Create directly connected graph. xc = nn.Variable((2, 10)) yc = mlp2(mlp2(xc, 'a'), 'b') # D. Rewire the graphs A and B. xb.rewire_on(ya) # E. Check whether the results are the same. rng = np.random.RandomState(seed) data = rng.randn(*xa.shape) xa.d = data xc.d = data params = nn.get_parameters() def zero_grad(): for p in params.values(): p.grad.zero() def backup_params(): return [p.g.copy() for p in params.values()] # Checking forward yb.forward(clear_no_need_grad=clear_buffer) yc.forward(clear_no_need_grad=clear_buffer) assert_allclose(yb.d, yc.d) # Checking backward zero_grad() yb.backward(clear_buffer=clear_buffer) gb = backup_params() zero_grad() yc.backward(clear_buffer=clear_buffer) gc = backup_params() assert_allclose(xa.d, xc.d) for b, c in zip(gb, gc): assert_allclose(b, c)
def load_parameters(self, path, extension=".h5"): """Load parameters from a file into this module. Args: path: str or file-like object """ scope = OrderedDict() with nn.parameter_scope('', scope): nn.load_parameters(path, extension=extension) params = nn.get_parameters() self.set_parameters(params)
def modify(self, f, inputs): params = [v.data for v in nn.get_parameters(grad_only=False).values()] inputs_ = [] for inp in inputs: if inp.data not in params: inputs_.append(inp) else: inp = inp.get_unlinked_variable(need_grad=False) inputs_.append(inp) o = self._call_function(f.info.type_name, inputs_, f.info.args) return o
def test_load_save_parameters(): module = MyModule(shape=(5, 5)) params = module.get_parameters() if not os.path.exists('__nnabla_nas__'): os.makedirs('__nnabla_nas__') nn.save_parameters('__nnabla_nas__/params.h5', params) nn.load_parameters('__nnabla_nas__/params.h5') params0 = nn.get_parameters() for k, v in params.items(): assert_allclose(v.d, params0[k].d)
def load_parameters(self, path, extension=".h5", raise_if_missing=True): """Load parameters from a file into this module. Args: path: str or file-like object """ scope = OrderedDict() with nn.parameter_scope('', scope): nn.load_parameters(path, extension=extension) params = nn.get_parameters(grad_only=False) self.set_parameters(params, raise_if_missing=raise_if_missing)
def load_parameters(self, path, raise_if_missing=False): r"""Loads parameters from a file with the specified format. Args: path (str): The path to file. raise_if_missing (bool, optional): Raise exception if some parameters are missing. Defaults to `False`. """ with nn.parameter_scope('', OrderedDict()): nn.load_parameters(path) params = nn.get_parameters(grad_only=False) self.set_parameters(params, raise_if_missing=raise_if_missing)
def train(self): # variables for training tx_in = nn.Variable( [self._batch_size, self._x_input_length, self._cols_size]) tx_out = nn.Variable( [self._batch_size, self._x_output_length, self._cols_size]) tpred = self.network(tx_in, self._lstm_unit_name, self._lstm_units) tpred.persistent = True loss = F.mean(F.squared_error(tpred, tx_out)) solver = S.Adam(self._learning_rate) solver.set_parameters(nn.get_parameters()) # variables for validation vx_in = nn.Variable( [self._batch_size, self._x_input_length, self._cols_size]) vx_out = nn.Variable( [self._batch_size, self._x_output_length, self._cols_size]) vpred = self.network(vx_in, self._lstm_unit_name, self._lstm_units) # data iterators tdata = self._load_dataset(self._training_dataset_path, self._batch_size, shuffle=True) vdata = self._load_dataset(self._validation_dataset_path, self._batch_size, shuffle=True) # monitors from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(self._monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Validation error", monitor, interval=10) # Training loop for i in range(self._max_iter): if i % self._val_interval == 0: ve = self._validate(vpred, vx_in, vx_out, vdata, self._val_iter) monitor_verr.add(i, ve / self._val_iter) te = self._train(tpred, solver, loss, tx_in, tx_out, tdata.next(), self._weight_decay) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, te) monitor_time.add(i) ve = self._validate(vpred, vx_in, vx_out, vdata, self._val_iter) monitor_verr.add(i, ve / self._val_iter) # Save a best model parameters nn.save_parameters(self._model_params_path)
def _get_variable_or_create(self, v, callback, current_scope): if v.variable is not None: return v.variable v = callback._apply_generate_variable(v) if v.variable is not None: return v.variable pvar = v.proto name = pvar.name shape = list(pvar.shape.dim) if shape[0] < 0: shape[0] = self.batch_size shape = tuple(shape) assert np.all(np.array(shape) > 0 ), "Shape must be positive. Given {}.".format(shape) if pvar.type != 'Parameter': # Create a new variable and returns. var = nn.Variable(shape) v.variable = var var.name = name return var # Trying to load the parameter from .nnp file. callback.verbose('Loading parameter `{}` from .nnp.'.format(name)) try: param = get_parameter(name) if param is None: logger.info( 'Parameter `{}` is not found. Initializing.'.format(name)) tmp = _create_variable(pvar, name, shape, self.rng) param = tmp.variable_instance set_parameter(name, param) # Always copy param to current scope even if it already exists. with nn.parameter_scope('', current_scope): set_parameter(name, param) except: import sys import traceback raise ValueError( 'An error occurs during creation of a variable `{}` as a' ' parameter variable. The error was:\n----\n{}\n----\n' 'The parameters registered was {}'.format( name, traceback.format_exc(), '\n'.join( list(nn.get_parameters(grad_only=False).keys())))) assert shape == param.shape param = param.get_unlinked_variable(need_grad=v.need_grad) v.variable = param param.name = name return param
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() L.forward(clear_no_need_grad=cnng) L.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() assert np.all(g == g2)
def encode_text(text): param_dict = nn.get_parameters() embed_dim = param_dict['text_projection'].shape[1] context_length = param_dict['positional_embedding'].shape[0] vocab_size = param_dict['token_embedding/W'].shape[0] transformer_width = param_dict['ln_final/W'].shape[0] transformer_heads = transformer_width // 64 transformer_layers = len( set( k.split('/')[2] for k in param_dict.keys() if k.startswith(f'transformer/resblocks'))) token_embedding = nn.parameter.get_parameter_or_create( name='token_embedding/W', shape=(vocab_size, transformer_width)) x = F.embed(text, token_embedding) # [batch_size, n_ctx, d_model] positional_embedding = nn.parameter.get_parameter_or_create( name='positional_embedding', shape=(context_length, transformer_width)).reshape( (1, context_length, transformer_width)) x = x + positional_embedding x = F.transpose(x, (1, 0, 2)) # NLD -> LND x = transformer(x, transformer_width, transformer_layers, transformer_heads, attn_mask=build_attn_mask(context_length)) x = F.transpose(x, (1, 0, 2)) # LND -> NLD ln_final_W = nn.parameter.get_parameter_or_create( name='ln_final/W', shape=(transformer_width, )).reshape( (1, 1, transformer_width)) ln_final_b = nn.parameter.get_parameter_or_create( name='ln_final/b', shape=(transformer_width, )).reshape( (1, 1, transformer_width)) x = F.layer_normalization(x, ln_final_b, ln_final_W, batch_axis=(0, 1)) idx = F.max(text, axis=-1, only_index=True) idx.forward() x = x[list(range(x.shape[0])), idx.d].reshape((1, x.shape[0], -1)) text_projection = nn.parameter.get_parameter_or_create( name='text_projection', shape=(transformer_width, embed_dim)).reshape( (1, transformer_width, embed_dim)) x = F.batch_matmul(x, text_projection) x = x.reshape((-1, embed_dim)) return x
def get_parameters(self, recursive=True, grad_only=False, memo=None): """Obtain an OrderedDict object of all parameters in current Module. For example, .. code-block:: python x = nn.Variable.from_numpy_array((np.random.random((8, 32, 256, 256)))) conv_bn = ConvBn(2) y = conv_bn(x) params = conv_bn.get_parameters() for parameter_name, parameter_value in params.items(): print("{}:{}".format(parameter_name, parameter_value.shape)) The output looks like: .. code-block:: none conv/W:(2, 32, 1, 1) bn/beta:(1, 2, 1, 1) bn/gamma:(1, 2, 1, 1) bn/mean:(1, 2, 1, 1) bn/var:(1, 2, 1, 1) Notice that the parameter name looks like a filepath, with splash separated nested scope name. In addition, module name default is used with a prefix ``@``. Args: recursive (bool, optional, default=True): Whether obtain the parameters of current module's submodules. Default is True. grad_only (bool, optional, default=False): Whether only obtain the grad. Default is False. Returns: OrderedDict: Flattened parameter's name-value pairs of current Module. """ params = OrderedDict() if memo is None: memo = ParamMemo() if recursive: for name, module in self.submodules.items(): params.update( insert_parent_name( name, module.get_parameters(recursive=recursive, grad_only=grad_only, memo=memo))) with nn.parameter_scope('', self.parameter_scope): found_params = nn.get_parameters(grad_only=grad_only) filtered_params = memo.filter_and_update(found_params) params.update(filtered_params) return params
def __init__(self, black_list=[], params=None, name="identity"): self.graph_info = None self.entry_variables = None self.black_list = black_list self.params = params if params is not None else nn.get_parameters( grad_only=False) self.name = name self.end_variable = None self.outputs = [] # output of ref graph to output of new graph (TODO: change name) self.input_map = {}
def training(steps, learning_rate): solver = S.Sgd(learning_rate) solver.set_parameters( nn.get_parameters()) # Set parameter variables to be updated. for i in range(steps): x.d, t.d = data.next() loss.forward() solver.zero_grad() # Initialize gradients of all parameters to zero. loss.backward() solver.weight_decay(1e-5) # Applying weight decay as an regularization solver.update() if i % 100 == 0: # Print for each 10 iterations print(i, loss.d)
def save_all_params(params_dict, c, k, j, bundle_size, step_size, save_dir, epoch): params_dict[c] = nn.get_parameters(grad_only=False).copy() c += 1 if c == bundle_size or j == step_size - 1: dn = os.path.join(save_dir, 'epoch%02d' % (epoch), 'weights') ensure_dir(dn) for cc, params in params_dict.items(): fn = '%s/model_step%04d.h5' % (dn, k + cc) nn.save_parameters(fn, params=params, extension=".h5") k += c c = 0 params_dict = {} return params_dict, c, k
def __init__(self, batch_size=32, learning_rate=1e-4, max_iter=5086, total_epochs=20, monitor_path=None, val_weight=None, model_load_path=None): """ Construct all the necessary attributes for the attribute classifier. Args: batch_size (int): number of samples contained in each generated batch learning_rate (float) : learning rate max_iter (int) : maximum iterations for an epoch total_epochs (int) : total epochs to train the model val_weight : sample weights monitor_path (str) : model parameter to be saved model_load_path (str) : load the model """ self.batch_size = batch_size # Resnet 50 # training graph model = ResNet50() self.input_image = nn.Variable((self.batch_size, ) + model.input_shape) self.label = nn.Variable([self.batch_size, 1]) # fine tuning pool = model(self.input_image, training=True, use_up_to='pool') self.clf = clf_resnet50(pool) self.clf.persistent = True # loss self.loss = F.mean(F.sigmoid_cross_entropy(self.clf, self.label)) # hyper parameters self.solver = S.Adam(learning_rate) self.solver.set_parameters(nn.get_parameters()) # validation graph self.x_v = nn.Variable((self.batch_size, ) + model.input_shape) pool_v = model(self.x_v, training=False, use_up_to='pool') self.v_clf = clf_resnet50(pool_v, train=False) self.v_clf_out = F.sigmoid(self.v_clf) self.print_freq = 100 self.validation_weight = val_weight # val params self.acc = 0.0 self.total_epochs = total_epochs self.max_iter = max_iter self.monitor_path = monitor_path if model_load_path is not None: _ = nn.load_parameters(model_load_path)
def sample_arch_and_train(args, data_dict, controller_weights_dict): """ Execute these process. 1. For a certain number of times, let the controller construct sample architectures and test their performances. (By calling get_sample_and_feedback) 2. By using the performances acquired by the previous process, train the controller. 3. Select one architecture with the best validation accuracy and train its parameters. """ solver = S.Momentum(args.control_lr) # create solver for the controller solver.set_parameters(controller_weights_dict, reset=False, retain_state=True) solver.zero_grad() val_list = list() arch_list = list() with nn.auto_forward(): for c in range(args.num_candidate): output_line = " Architecture {} / {} ".format((c + 1), args.num_candidate) print("{0:-^80s}".format(output_line)) # sample one architecture and get its feedback for RL as loss loss, val_acc, sample_arch = get_sample_and_feedback( args, data_dict) val_list.append(val_acc) arch_list.append(sample_arch) loss.backward() # accumulate gradient each time print("{0:-^80s}\n".format(" Reinforcement Learning Phase ")) print("current accumulated loss:", loss.d) solver.weight_decay(0.025) solver.update() # train the controller print("\n{0:-^80s}\n".format(" CNN Learning Phase ")) best_idx = np.argmax(val_list) sample_arch = arch_list[best_idx] print("Train the model whose architecture is:") show_arch(sample_arch) print("and its accuracy is: {:.2f} %\n".format(100 * np.max(val_list))) print("Learnable Parameters:", params_count(nn.get_parameters())) # train a child network which achieves the best validation accuracy. val_acc = CNN_run(args, sample_arch, data_dict, with_train=True) return sample_arch, val_acc
def test_pf_prelu_execution(g_rng, inshape, base_axis, shared, slope_init, fix_parameters): slope_shape = tuple() if shared else (inshape[base_axis], ) slope_init = process_param_init(slope_init, slope_shape, g_rng) kw = {} insert_if_not_none(kw, 'slope_init', slope_init) insert_if_not_default(kw, 'base_axis', base_axis, 1) insert_if_not_default(kw, 'shared', shared, True) insert_if_not_default(kw, 'fix_parameters', fix_parameters, False) x = nn.Variable.from_numpy_array(g_rng.randn(*inshape)) # Check execution y = PF.prelu(x, **kw) y.forward() y.backward() # Check values # TODO # Check args assert y.parent.info.type_name == 'PReLU' args = y.parent.info.args assert args['base_axis'] == base_axis # Check created parameters assert y.parent.inputs[0] == x assert len(y.parent.inputs) == 2 assert len(nn.get_parameters()) == 1 slope = nn.get_parameters()['prelu/slope'] assert slope.shape == slope_shape assert slope.need_grad assert y.parent.inputs[1].need_grad == (not fix_parameters) if isinstance(slope_init, np.ndarray): assert np.allclose(slope_init, slope.d)
def main(): args = get_args() nn.load_parameters(args.input) params = nn.get_parameters(grad_only=False) processed = False # Convert memory layout layout = get_memory_layout(params) if args.memory_layout is None: pass if args.affine_to_conv: rm_list = [] ret = affine_to_conv(params, args.memory_layout, rm_list) for r in rm_list: print(r) nn.parameter.pop_parameter(r) if ret: logger.info('Converted affine to conv.') processed |= ret if args.memory_layout != layout: logger.info(f'Converting memory layout to {args.memory_layout}.') convert_memory_layout(params, args.memory_layout) processed |= True else: logger.info('No need to convert memory layout.') if args.force_4_channels: ret = force_4_channels(params, args.memory_layout) if ret: logger.info('Converted first conv to 4-channel input.') processed |= ret if args.force_3_channels: ret = force_3_channels(params, args.memory_layout) if ret: logger.info('Converted first conv to 3-channel input.') processed |= ret nn.clear_parameters() for key, param in params.items(): print(key) print(param.shape) nn.parameter.set_parameter(key, param) if not processed: logger.info( 'No change has been made for the input. Not saving a new parameter file.') return logger.info(f'Save a new parameter file at {args.output}') nn.save_parameters(args.output)
def test_save_load_parameters(): v = nn.Variable([64, 1, 28, 28], need_grad=False) with nn.parameter_scope("param1"): with nn.parameter_scope("conv1"): h = PF.convolution(v, 32, (3, 3)) b = PF.batch_normalization(h, batch_stat=True) with nn.parameter_scope("conv2"): h1 = PF.convolution(v, 32, (3, 3)) b2 = PF.batch_normalization(h1, batch_stat=True) for k, v in iteritems(nn.get_parameters(grad_only=False)): v.data.cast(np.float32)[...] = np.random.randn(*v.shape) with nn.parameter_scope("param1"): param1 = nn.get_parameters(grad_only=False) nn.save_parameters("tmp.h5") nn.save_parameters("tmp.protobuf") with nn.parameter_scope("param2"): nn.load_parameters('tmp.h5') param2 = nn.get_parameters(grad_only=False) with nn.parameter_scope("param3"): nn.load_parameters('tmp.protobuf') param3 = nn.get_parameters(grad_only=False) for par2 in [param2, param3]: assert param1.keys() == par2.keys() # Check order for (n1, p1), (n2, p2) in zip(sorted(param1.items()), sorted(par2.items())): assert n1 == n2 assert np.all(p1.d == p2.d) if par2 is not param3: # NOTE: data is automatically casted to fp32 in Protobuf assert p1.data.dtype == p2.data.dtype assert p1.need_grad == p2.need_grad
def create_nnabla_net(resnext=False): # Create nnabla graph from models import senet x = nn.Variable((1, 3, 224, 224), need_grad=False) if resnext: y = senet.se_resnext50(x, 1000, test=True) else: y = senet.se_resnet50(x, 1000, test=True) params = nn.get_parameters(grad_only=False) param_dims = 0 for k, v in params.items(): param_dims += np.prod(v.shape) print(k, v.shape, param_dims) print('total parameters: ', param_dims) return (x, y), params, param_dims
def test_compute_simple_hessian(ctx): nn.clear_parameters() # Network state = nn.Variable((1, 2)) output = PF.affine(state, 1, w_init=I.ConstantInitializer(value=1.), b_init=I.ConstantInitializer(value=1.)) loss = F.sum(output**2) # Input state_array = np.array([[1.0, 0.5]]) state.d = state_array # Grad of network params = nn.get_parameters().values() for param in params: param.grad.zero() grads = nn.grad([loss], params) flat_grads = F.concatenate(*[F.reshape(grad, (-1,)) for grad in grads]) if len(grads) > 1 \ else F.reshape(grads[0], (-1,)) # Compute hessian hessian = np.zeros((flat_grads.shape[0], flat_grads.shape[0]), dtype=np.float32) for i in range(flat_grads.shape[0]): flat_grads_i = flat_grads[i] flat_grads_i.forward() for param in params: param.grad.zero() flat_grads_i.backward() num_index = 0 for param in params: grad = param.g.flatten() # grad of grad so this is hessian hessian[i, num_index:num_index + len(grad)] = grad num_index += len(grad) actual = hessian expected = np.array([[ 2 * state_array[0, 0]**2, 2 * state_array[0, 0] * state_array[0, 1], 2 * state_array[0, 0] ], [ 2 * state_array[0, 0] * state_array[0, 1], 2 * state_array[0, 1]**2, 2 * state_array[0, 1] ], [2 * state_array[0, 0], 2 * state_array[0, 1], 2.]]) assert_allclose(actual, expected)
def data_distill(model, uniform_data_iterator, num_iter): generated_img = [] for _ in range(uniform_data_iterator.size // uniform_data_iterator.batch_size): img, _ = uniform_data_iterator.next() dst_img = nn.Variable(img.shape, need_grad=True) dst_img.d = img img_params = OrderedDict() img_params['img'] = dst_img init_lr = 0.5 solver = S.Adam(alpha=init_lr) solver.set_parameters(img_params) #scheduler = lr_scheduler.CosineScheduler(init_lr=0.5, max_iter=num_iter) scheduler = ReduceLROnPlateauScheduler(init_lr=init_lr, min_lr=1e-4, verbose=False, patience=100) dummy_solver = S.Sgd(lr=0) dummy_solver.set_parameters(nn.get_parameters()) for it in tqdm(range(num_iter)): lr = scheduler.get_learning_rate() solver.set_learning_rate(lr) global outs outs = [] global batch_stats batch_stats = [] y = model(denormalize(dst_img), force_global_pooling=True, training=False) # denormalize to U(0, 255) y.forward(function_post_hook=get_output) assert len(outs) == len(batch_stats) loss = zeroq_loss(batch_stats, outs, dst_img) loss.forward() solver.zero_grad() dummy_solver.zero_grad() loss.backward() solver.weight_decay(1e-6) solver.update() scheduler.update_lr(loss.d) generated_img.append(dst_img.d) return generated_img
def mNextParam(self, idx): if np.random.rand() > 0.8: with nn.parameter_scope(net(7)): param = nn.get_parameters() for i, j in param.items(): self.mParam[net(idx)]["pre"][i] = self.mParam[net( idx)]["next"][i].copy() self.mParam[net(idx)]["next"][i] = param.get(i).d else: for i in self.mParam[net(idx)]["next"].keys(): self.mParam[net(idx)]["pre"][i] = self.mParam[net( idx)]["next"][i].copy() self.mParam[net(idx)]["next"][i] = np.random.randn( *(self.mParam[net(idx)]["next"][i].shape)) return return
def __init__(self, graph, device_id, ext_name, solver=None, n_run=100, max_measure_execution_time=1, time_scale="m"): self.graph = graph # if solver is None, training time (forward + backward + update) is not calculated self.solver = solver self.n_run = n_run self.device_id = str(device_id) self.ext_name = ext_name self.ext_module = import_extension_module(self.ext_name) self.max_measure_execution_time = max_measure_execution_time self.time_scale = time_scale self.result = dict() self.name2val = {v: k for k, v in nn.get_parameters().items()} if self.n_run < 1: raise AssertionError("n_run must be bigger than 1")
def create_graphviz_digraph(self, vleaf, format=None): ''' Create a :obj:`graphviz.Digraph` object given the leaf variable of a computation graph. One of nice things of getting ``Digraph`` directly is that the drawn graph can be displayed inline in a Jupyter notebook as described in `Graphviz documentation <https://graphviz.readthedocs.io/en/stable/manual.html#jupyter-notebooks>`_. Args: vleaf (`nnabla.Variable`): End variable. All variables and functions which can be traversed from this variable are shown in the reuslt. format (str): Force overwrite ``format`` (``'pdf', 'png', ...)``) configuration. Returns: graphviz.Digraph ''' from nnabla import get_parameters import copy try: from graphviz import Digraph except: raise ImportError("Install graphviz. `pip install graphviz.`") if format is None: format = self._format graph = Digraph(format=format) graph.attr("node", style="filled") params = get_parameters(grad_only=False) var2name = {v.data: k for k, v in params.items()} fun2scope = {} var2postname = copy.copy(var2name) def fscope(f): names = [var2name[v.data] for v in f.inputs if v.data in var2name] if names: c = os.path.commonprefix(names) fun2scope[f] = c for n in names: var2postname[params[n].data] = n[len(c):] vleaf.visit(fscope) func = self.functor(graph, self._verbose, fun2scope=fun2scope, var2name=var2postname) vleaf.visit(func) return graph
def get_parameters(self, recursive=True, grad_only=False, memo=None): params = OrderedDict() if memo is None: memo = ParamMemo() if recursive: for name, module in self.submodules.items(): params.update( insert_parent_name( name, module.get_parameters(recursive=recursive, grad_only=grad_only, memo=memo))) with nn.parameter_scope('', self.parameter_scope): found_params = nn.get_parameters(grad_only=grad_only) filtered_params = memo.filter_and_update(found_params) params.update(filtered_params) return params
def train(max_iter=60000): # Initialize data provider di_l = I.data_iterator_mnist(batch_size, True) di_t = I.data_iterator_mnist(batch_size, False) # Network shape_x = (1, 28, 28) shape_z = (50, ) x = nn.Variable((batch_size, ) + shape_x) loss_l = I.vae(x, shape_z, test=False) loss_t = I.vae(x, shape_z, test=True) # Create solver solver = S.Adam(learning_rate) solver.set_parameters(nn.get_parameters()) # Monitors for training and validation path = cache_dir(os.path.join(I.name, "monitor")) monitor = M.Monitor(path) monitor_train_loss = M.MonitorSeries("train_loss", monitor, interval=600) monitor_val_loss = M.MonitorSeries("val_loss", monitor, interval=600) monitor_time = M.MonitorTimeElapsed("time", monitor, interval=600) # Training Loop. for i in range(max_iter): # Initialize gradients solver.zero_grad() # Forward, backward and update x.d, _ = di_l.next() loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(weight_decay) solver.update() # Forward for test x.d, _ = di_t.next() loss_t.forward(clear_no_need_grad=True) # Monitor for logging monitor_train_loss.add(i, loss_l.d.copy()) monitor_val_loss.add(i, loss_t.d.copy()) monitor_time.add(i) return path
def test_propagate(self): """ The graph used bellow is from Fig.1 in the original paper (https://arxiv.org/pdf/1511.05493.pdf) """ edges = {"B": [(0, 1), (3, 2)], "C": [(2, 1), (1, 3)]} with nn.parameter_scope("test_propagate"): vertices = nn.Variable((4, 1)) outputs = L.propagate(vertices, edges) params = nn.get_parameters() self.assertEqual((4, 1), outputs.shape) self.assertEqual(8, len(params)) self.assertEqual((1, 3, 1), params["W_zr/affine/W"].shape) self.assertEqual((1, 2, 1), params["U_zr/affine/W"].shape) self.assertEqual((1, 1), params["U/affine/W"].shape)
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = int(n_train_data / batch_size) n_iter = n_epoch * iter_epoch extension_module = args.context alpha = args.alpha # Supervised Model ## ERM batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l_0 = nn.Variable((batch_size, m, h, w)) y_l_0 = nn.Variable((batch_size, 1)) pred = cnn_model_003(ctx, x_l_0) loss_ce = ce_loss(ctx, pred, y_l_0) loss_er = er_loss(ctx, pred) loss_supervised = loss_ce + loss_er ## VRM (mixup) x_l_1 = nn.Variable((batch_size, m, h, w)) y_l_1 = nn.Variable((batch_size, 1)) coef = nn.Variable() coef_b = F.broadcast(coef.reshape([1]*x_l_0.ndim, unlink=True), x_l_0.shape) x_l_m = coef_b * x_l_0 + (1 - coef_b) * x_l_1 coef_b = F.broadcast(coef.reshape([1]*pred.ndim, unlink=True), pred.shape) y_l_m = coef_b * F.one_hot(y_l_0, (n_cls, )) \ + (1-coef_b) * F.one_hot(y_l_1, (n_cls, )) x_l_m.need_grad, y_l_m.need_grad = False, False pred_m = cnn_model_003(ctx, x_l_m) loss_er_m = er_loss(ctx, pred_m) #todo: need? loss_ce_m = ce_loss_soft(ctx, pred, y_l_m) loss_supervised_m = loss_ce_m #+ loss_er_m # Semi-Supervised Model ## ERM x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0 = cnn_model_003(ctx, x_u0) pred_x_u1 = cnn_model_003(ctx, x_u1) pred_x_u0.persistent, pred_x_u1.persistent = True, True loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) loss_unsupervised = loss_sr + loss_er0 + loss_er1 ## VRM (mixup) x_u2 = nn.Variable((batch_size, m, h, w)) # not to overwrite x_u1.d coef_u = nn.Variable() coef_u_b = F.broadcast(coef_u.reshape([1]*x_u0.ndim, unlink=True), x_u0.shape) x_u_m = coef_u_b * x_u0 + (1-coef_u_b) * x_u2 pred_x_u0_ = nn.Variable(pred_x_u0.shape) # unlink forward pass but reuse result pred_x_u1_ = nn.Variable(pred_x_u1.shape) pred_x_u0_.data = pred_x_u0.data pred_x_u1_.data = pred_x_u1.data coef_u_b = F.broadcast(coef_u.reshape([1]*pred_x_u0.ndim, unlink=True), pred_x_u0.shape) y_u_m = coef_u_b * pred_x_u0_ + (1-coef_u_b) * pred_x_u1_ x_u_m.need_grad, y_u_m.need_grad = False, False pred_x_u_m = cnn_model_003(ctx, x_u_m) loss_er_u_m = er_loss(ctx, pred_x_u_m) #todo: need? loss_ce_u_m = ce_loss_soft(ctx, pred_x_u_m, y_u_m) loss_unsupervised_m = loss_ce_u_m #+ loss_er_u_m # Evaluatation Model batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l_0.d, _ , y_l_0.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train ## forward (supervised and its mixup) loss_supervised.forward(clear_no_need_grad=True) coef_data = np.random.beta(alpha, alpha) coef.d = coef_data x_l_1.d = np.random.permutation(x_l0_data) y_l_1.d = np.random.permutation(y_l_data) loss_supervised_m.forward(clear_no_need_grad=True) ## forward (unsupervised and its mixup) loss_unsupervised.forward(clear_no_need_grad=True) coef_data = np.random.beta(alpha, alpha) coef_u.d = coef_data x_u2.d = np.random.permutation(x_u1_data) loss_unsupervised_m.forward(clear_no_need_grad=True) ## backward solver.zero_grad() loss_supervised.backward(clear_buffer=False) loss_supervised_m.backward(clear_buffer=False) loss_unsupervised.backward(clear_buffer=False) loss_unsupervised_m.backward(clear_buffer=True) solver.update() # Evaluate if int((i+1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 ve /= iter_val msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve) * 100) print(msg) if ve < ve_best: if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) if save_path_prev != "": os.remove(save_path_prev) save_path = os.path.join( args.model_save_path, 'params_%06d.h5' % epoch) nn.save_parameters(save_path) save_path_prev = save_path ve_best = ve st = time.time() epoch +=1
def train(): """ Main script. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Dataset # We use Tiny ImageNet from Stanford CS231N class. # https://tiny-imagenet.herokuapp.com/ # Tiny ImageNet consists of 200 categories, each category has 500 images # in training set. The image size is 64x64. To adapt ResNet into 64x64 # image inputs, the input image size of ResNet is set as 56x56, and # the stride in the first conv and the first max pooling are removed. data = data_iterator_tiny_imagenet(args.batch_size, 'train') vdata = data_iterator_tiny_imagenet(args.batch_size, 'val') num_classes = 200 tiny = True # TODO: Switch ILSVRC2012 dataset and TinyImageNet. t_model = get_model( args, num_classes, test=False, tiny=tiny) t_model.pred.persistent = True # Not clearing buffer of pred in backward v_model = get_model( args, num_classes, test=True, tiny=tiny) v_model.pred.persistent = True # Not clearing buffer of pred in forward # Create Solver. solver = S.Momentum(args.learning_rate, 0.9) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_err = M.MonitorSeries("Training error", monitor, interval=10) monitor_vloss = M.MonitorSeries("Validation loss", monitor, interval=10) monitor_verr = M.MonitorSeries("Validation error", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=10) # Training loop. for i in range(args.max_iter): # Save parameters if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'param_%06d.h5' % i)) # Validation if i % args.val_interval == 0: # Clear all intermediate memory to save memory. # t_model.loss.clear_recursive() l = 0.0 e = 0.0 for j in range(args.val_iter): images, labels = vdata.next() v_model.image.d = images v_model.label.d = labels v_model.image.data.cast(np.uint8, ctx) v_model.label.data.cast(np.int32, ctx) v_model.loss.forward(clear_buffer=True) l += v_model.loss.d e += categorical_error(v_model.pred.d, v_model.label.d) monitor_vloss.add(i, l / args.val_iter) monitor_verr.add(i, e / args.val_iter) # Clear all intermediate memory to save memory. # v_model.loss.clear_recursive() # Training l = 0.0 e = 0.0 solver.zero_grad() # Gradient accumulation loop for j in range(args.accum_grad): images, labels = data.next() t_model.image.d = images t_model.label.d = labels t_model.image.data.cast(np.uint8, ctx) t_model.label.data.cast(np.int32, ctx) t_model.loss.forward(clear_no_need_grad=True) t_model.loss.backward(clear_buffer=True) # Accumulating gradients l += t_model.loss.d e += categorical_error(t_model.pred.d, t_model.label.d) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, l / args.accum_grad) monitor_err.add(i, e / args.accum_grad) monitor_time.add(i) # Learning rate decay at scheduled iter if i in args.learning_rate_decay_at: solver.set_learning_rate(solver.learning_rate() * 0.1) nn.save_parameters(os.path.join(args.model_save_path, 'param_%06d.h5' % args.max_iter))
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = int(n_train_data / batch_size) n_iter = n_epoch * iter_epoch extension_module = args.context rampups_x = np.linspace(0, 1, 50) rampups = np.exp(-5 * (1 - rampups_x)**2) # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred = cnn_model_003(ctx, x_l) loss_ce = ce_loss(ctx, pred, y_l) loss_er = er_loss(ctx, pred) loss_supervised = loss_ce + loss_er ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0 = cnn_model_003(ctx, x_u0) pred_x_u1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) coef = nn.Variable() coef.d = 0 loss_unsupervised = coef * loss_sr + loss_er0 + loss_er1 ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if int((i+1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 ve /= iter_val msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve) * 100) print(msg) st = time.time() epoch +=1 coef.d = rampups[epoch]
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. margin = 1.0 # Margin for contrastive loss. # TRAIN # Create input variables. image0 = nn.Variable([args.batch_size, 1, 28, 28]) image1 = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size]) # Create predition graph. pred = mnist_lenet_siamese(image0, image1, test=False) # Create loss function. loss = F.mean(contrastive_loss(pred, label, margin)) # TEST # Create input variables. vimage0 = nn.Variable([args.batch_size, 1, 28, 28]) vimage1 = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size]) # Create predition graph. vpred = mnist_lenet_siamese(vimage0, vimage1, test=True) vloss = F.mean(contrastive_loss(vpred, vlabel, margin)) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss = M.MonitorSeries("Training loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Training time", monitor, interval=100) monitor_vloss = M.MonitorSeries("Test loss", monitor, interval=10) # Initialize DataIterator for MNIST. rng = np.random.RandomState(313) data = siamese_data_iterator(args.batch_size, True, rng) vdata = siamese_data_iterator(args.batch_size, False, rng) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage0.d, vimage1.d, vlabel.d = vdata.next() vloss.forward(clear_buffer=True) ve += vloss.d monitor_vloss.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) image0.d, image1.d, label.d = data.next() solver.zero_grad() # Training forward, backward and update loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() monitor_loss.add(i, loss.d.copy()) monitor_time.add(i) parameter_file = os.path.join( args.model_save_path, 'params_%06d.h5' % args.max_iter) nn.save_parameters(parameter_file) nnp_file = os.path.join( args.model_save_path, 'siamese_%06d.nnp' % (args.max_iter)) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batch_size, 'outputs': {'y': vpred}, 'names': {'x0': vimage0, 'x1': vimage1}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x0', 'x1'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [vimage0.d, vimage1.d], [ vimage0, vimage1], vpred, nnp_file)
def test_data_parallel_communicator(): try: import nnabla_ext import nnabla_ext.cuda from nnabla.contrib.context import extension_context except: pytest.skip("DataParallelCommunicator are only supported in CUDA now.") n_devices = nnabla_ext.cuda.init.get_device_count() if n_devices < 2: pytest.skip("Number of cuda devices is less than 2.") # Contexts and Computation Graph extension_module = "cuda" ctxs = [] for d in range(n_devices): ctx = extension_context(extension_module, device_id="{}".format(d)) ctxs.append(ctx) with nn.context_scope(ctx): x_data = np.random.rand(4, 5) x = nn.Variable(x_data.shape) with nn.parameter_scope("gpu{}".format(d)): with nn.parameter_scope("affine1"): z = PF.affine(x, 6) with nn.parameter_scope("affine2"): y = PF.affine(z, 5) # Init w.g grads = [] for d in range(n_devices): with nn.parameter_scope("gpu{}".format(d)): params = nn.get_parameters() grad = [] for i, elm in enumerate(params.items()): k, v = elm grad_ = np.random.randn(*v.shape) v.g = grad_ v.grad.cast(np.float32, ctxs[d]) grad.append(grad_) grads.append(grad) # Reference ref_grads = [] with nn.parameter_scope("gpu{}".format(d)): params = nn.get_parameters() for i in range(len(params)): ave_grad = 0 for d in range(n_devices): ave_grad += grads[d][i] ave_grad /= n_devices ref_grads.append(ave_grad) # Communicator try: comm = C.DataParalellCommunicator(ctxs[0]) except: pytest.skip( "DataParalellCommunicator is not supported in cpu or not linux platform.") for d in range(n_devices): with nn.parameter_scope("gpu{}".format(d)): comm.add_context_and_parameters( (ctxs[d], nn.get_parameters())) comm.init() comm.allreduce(division=True) # Check atol = 1e-6 for d in range(n_devices): with nn.parameter_scope("gpu{}".format(d)): params = nn.get_parameters() for i, elm in enumerate(params.items()): k, v = elm assert np.allclose(ref_grads[i], v.g, atol=atol)
def train(): """ Naive Multi-Device Training NOTE: the communicator exposes low-level interfaces * Parse command line arguments. * Instantiate a communicator and set parameter variables. * Specify contexts for computation. * Initialize DataIterator. * Construct a computation graph for training and one for validation. * Initialize solver and set parameter variables to that. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop * Set parameter gradients zero * Execute backprop. * Inplace allreduce (THIS IS THE MAIN difference from a single device training) * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size # Communicator and Context extension_module = "cuda.cudnn" ctx = extension_context(extension_module) comm = C.MultiProcessDataParalellCommunicator(ctx) comm.init() n_devices = comm.size mpi_rank = comm.rank device_id = mpi_rank ctx = extension_context(extension_module, device_id=device_id) # Create training graphs test = False image_train = nn.Variable((args.batch_size, 3, 32, 32)) label_train = nn.Variable((args.batch_size, 1)) pred_train = cifar100_resnet23_prediction( image_train, ctx, test) loss_train = cifar100_resnet32_loss(pred_train, label_train) input_image_train = {"image": image_train, "label": label_train} # add parameters to communicator comm.add_context_and_parameters((ctx, nn.get_parameters())) # Create validation graph test = True image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = cifar100_resnet23_prediction( image_valid, ctx, test) input_image_valid = {"image": image_valid} # Solvers solver = S.Adam() solver.set_parameters(nn.get_parameters()) base_lr = args.learning_rate warmup_iter = int(1. * n_train_samples / args.batch_size / n_devices) * args.warmup_epoch warmup_slope = 1. * n_devices / warmup_iter # Create monitor from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) with data_iterator_cifar100(args.batch_size, True) as tdata, \ data_iterator_cifar100(bs_valid, False) as vdata: # Training-loop for i in range(int(args.max_iter / n_devices)): # Validation if mpi_rank == 0: if i % int(n_train_samples / args.batch_size / n_devices) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i * n_devices, ve) if i % int(args.model_save_interval / n_devices) == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Forward/Zerograd/Backward image, label = tdata.next() input_image_train["image"].d = image input_image_train["label"].d = label loss_train.forward() solver.zero_grad() loss_train.backward() # In-place Allreduce comm.allreduce(division=True) # Solvers update solver.update() # Linear Warmup if i < warmup_iter: lr = base_lr * n_devices * warmup_slope * i solver.set_learning_rate(lr) else: lr = base_lr * n_devices solver.set_learning_rate(lr) if mpi_rank == 0: e = categorical_error( pred_train.d, input_image_train["label"].d) monitor_loss.add(i * n_devices, loss_train.d.copy()) monitor_err.add(i * n_devices, e) monitor_time.add(i * n_devices) if mpi_rank == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % (args.max_iter / n_devices)))
def train(): """ Main script. Steps: * Parse command line arguments. * Specify a context for computation. * Initialize DataIterator for MNIST. * Construct a computation graph for training and validation. * Initialize a solver and set parameter variables to it. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprop on the training graph. * Compute training error * Set parameter gradients zero * Execute backprop. * Solver updates parameters by using gradients computed by backprop. """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. mnist_cnn_prediction = mnist_lenet_prediction if args.net == 'resnet': mnist_cnn_prediction = mnist_resnet_prediction # TRAIN # Create input variables. image = nn.Variable([args.batch_size, 1, 28, 28]) label = nn.Variable([args.batch_size, 1]) # Create prediction graph. pred = mnist_cnn_prediction(image, test=False) pred.persistent = True # Create loss function. loss = F.mean(F.softmax_cross_entropy(pred, label)) # TEST # Create input variables. vimage = nn.Variable([args.batch_size, 1, 28, 28]) vlabel = nn.Variable([args.batch_size, 1]) # Create predition graph. vpred = mnist_cnn_prediction(vimage, test=True) # Create Solver. solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) # Initialize DataIterator for MNIST. data = data_iterator_mnist(args.batch_size, True) vdata = data_iterator_mnist(args.batch_size, False) # Training loop. for i in range(args.max_iter): if i % args.val_interval == 0: # Validation ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) if i % args.model_save_interval == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Training forward image.d, label.d = data.next() solver.zero_grad() loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() e = categorical_error(pred.d, label.d) monitor_loss.add(i, loss.d.copy()) monitor_err.add(i, e) monitor_time.add(i) ve = 0.0 for j in range(args.val_iter): vimage.d, vlabel.d = vdata.next() vpred.forward(clear_buffer=True) ve += categorical_error(vpred.d, vlabel.d) monitor_verr.add(i, ve / args.val_iter) parameter_file = os.path.join( args.model_save_path, '{}_params_{:06}.h5'.format(args.net, args.max_iter)) nn.save_parameters(parameter_file)
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context # Model ## supervised cnn batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) x_l.persistent = True y_l = nn.Variable((batch_size, 1)) y_l.persistent = True pred = cnn_model_003(ctx, "cnn", x_l) loss_ce = ce_loss(ctx, pred, y_l) loss_er = er_loss(ctx, pred) loss_supervised = loss_ce + loss_er ## supervised resnet pred_res = cifar10_resnet23_prediction(ctx, "resnet", x_l) loss_res_ce = ce_loss(ctx, pred_res, y_l) loss_res_supervised = loss_res_ce ## stochastic regularization for cnn x_u0 = nn.Variable((batch_size, m, h, w)) x_u0.persistent = True x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0 = cnn_model_003(ctx, "cnn", x_u0) pred_x_u0.persistent = True pred_x_u1 = cnn_model_003(ctx, "cnn", x_u1) loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) loss_unsupervised = loss_sr + loss_er0 + loss_er1 ## knowledge transfer for resnet pred_res_x_u0 = cifar10_resnet23_prediction(ctx, "resnet", x_u0) loss_res_unsupervised = kl_divergence(ctx, pred_res_x_u0, pred_x_u0) ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) x_eval.persistent = True # reused pred_eval = cnn_model_003(ctx, "cnn", x_eval, test=True) pred_res_eval = cifar10_resnet23_prediction(ctx, "resnet", x_eval, test=True) # Solver with nn.context_scope(ctx): with nn.parameter_scope("cnn"): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) with nn.parameter_scope("resnet"): solver_res = S.Adam(alpha=learning_rate) solver_res.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train for cnn loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Train for resnet loss_res_supervised.forward(clear_no_need_grad=True) loss_res_unsupervised.forward(clear_no_need_grad=True) solver_res.zero_grad() loss_res_supervised.backward(clear_buffer=True) pred_x_u0.need_grad = False # no need grad for teacher loss_res_unsupervised.backward(clear_buffer=True) solver_res.update() pred_x_u0.need_grad = True # Evaluate if (i+1) % iter_epoch == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop for cnn ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Model:cnn,Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) # Evaluation loop for resnet ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_res_eval.forward(clear_buffer=True) ve += categorical_error(pred_res_eval.d, label) iter_val += 1 msg = "Model:resnet,Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch +=1
def main(): # Get arguments args = get_args() data_file = "https://raw.githubusercontent.com/tomsercu/lstm/master/data/ptb.train.txt" model_file = args.work_dir + "model.h5" # Load Dataset itow, wtoi, dataset = load_ptbset(data_file) # Computation environment settings from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create data provider n_word = len(wtoi) n_dim = args.embed_dim batchsize = args.batchsize half_window = args.half_window_length n_negative = args.n_negative_sample di = DataIteratorForEmbeddingLearning( batchsize=batchsize, half_window=half_window, n_negative=n_negative, dataset=dataset) # Create model # - Real batch size including context samples and negative samples size = batchsize * (1 + n_negative) * (2 * (half_window - 1)) # Model for learning # - input variables xl = nn.Variable((size,)) # variable for word yl = nn.Variable((size,)) # variable for context # Embed layers for word embedding function # - f_embed : word index x to get y, the n_dim vector # -- for each sample in a minibatch hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context hl = F.sum(hx * hy, axis=1) # -- Approximated likelihood of context prediction # pos: word context, neg negative samples tl = nn.Variable([size, ], need_grad=False) loss = F.sigmoid_cross_entropy(hl, tl) loss = F.mean(loss) # Model for test of searching similar words xr = nn.Variable((1,), need_grad=False) hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Create monitor. monitor = M.Monitor(args.work_dir) monitor_loss = M.MonitorSeries( "Training loss", monitor, interval=args.monitor_interval) monitor_time = M.MonitorTimeElapsed( "Training time", monitor, interval=args.monitor_interval) # Do training max_epoch = args.max_epoch for epoch in range(max_epoch): # iteration per epoch for i in range(di.n_batch): # get minibatch xi, yi, ti = di.next() # learn solver.zero_grad() xl.d, yl.d, tl.d = xi, yi, ti loss.forward(clear_no_need_grad=True) loss.backward(clear_buffer=True) solver.update() # monitor itr = epoch * di.n_batch + i monitor_loss.add(itr, loss.d) monitor_time.add(itr) # Save model nn.save_parameters(model_file) # Evaluate by similarity max_check_words = args.max_check_words for i in range(max_check_words): # prediction xr.d = i hr.forward(clear_buffer=True) h = hr.d # similarity calculation w = nn.get_parameters()['e1/embed/W'].d s = np.sqrt((w * w).sum(1)) w /= s.reshape((s.shape[0], 1)) similarity = w.dot(h[0]) / s[i] # for understanding output_similar_words(itow, i, similarity)
def main(args): # Settings device_id = args.device_id batch_size = 100 batch_size_eval = 100 n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred = cnn_model_003(ctx, x_l) loss_ce = ce_loss(ctx, pred, y_l) loss_er = er_loss(ctx, pred) loss_supervised = loss_ce + loss_er ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w), need_grad=False) x_u1 = nn.Variable((batch_size, m, h, w), need_grad=False) pred_x_u0 = cnn_model_003(ctx, x_u0) pred_x_u1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) loss_unsupervised = loss_sr + loss_er0 + loss_er1 ## autoencoder path = args.model_path nn.load_parameters(path) x_u0_rc = cnn_ae_model_000(ctx, x_u0, act=F.relu, test=True) x_u1_rc = cnn_ae_model_000(ctx, x_u1, act=F.relu, test=True) x_u0_rc.need_grad = False x_u1_rc.need_grad = False pred_x_u0_rc = cnn_model_003(ctx, x_u0_rc, test=False) pred_x_u1_rc = cnn_model_003(ctx, x_u1_rc, test=False) loss_sr_rc = sr_loss(ctx, pred_x_u0_rc, pred_x_u1_rc) loss_er0_rc = er_loss(ctx, pred_x_u0_rc) loss_er1_rc = er_loss(ctx, pred_x_u1_rc) loss_unsupervised_rc = loss_sr_rc + loss_er0_rc + loss_er1_rc loss_unsupervised += loss_unsupervised_rc ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) solver.update() loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if (i+1) % iter_epoch == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = x_data[k:k+batch_size_eval, :] label = y_data[k:k+batch_size_eval, :] pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch +=1
def train(): """ Naive Multi-Device Training NOTE: the communicator exposes low-level interfaces * Parse command line arguments. * Specify contexts for computation. * Initialize DataIterator. * Construct computation graphs for training and one for validation. * Initialize solvers and set parameter variables to those. * Instantiate a communicator and set parameter variables. * Create monitor instances for saving and displaying training stats. * Training loop * Computate error rate for validation data (periodically) * Get a next minibatch. * Execute forwardprops * Set parameter gradients zero * Execute backprop. * Inplace allreduce (THIS IS THE MAIN difference from a single device training) * Solver updates parameters by using gradients computed by backprop. * Compute training error """ # Parse args args = get_args() n_train_samples = 50000 bs_valid = args.batch_size # Create contexts extension_module = args.context if extension_module != "cuda" and \ extension_module != "cuda.cudnn": raise Exception("Use `cuda` or `cuda.cudnn` extension_module.") n_devices = args.n_devices ctxs = [] for i in range(n_devices): ctx = extension_context(extension_module, device_id=i) ctxs.append(ctx) ctx = ctxs[-1] # Create training graphs input_image_train = [] preds_train = [] losses_train = [] test = False for i in range(n_devices): image = nn.Variable((args.batch_size, 3, 32, 32)) label = nn.Variable((args.batch_size, 1)) device_scope_name = "device{}".format(i) pred = cifar100_resnet23_prediction( image, ctxs[i], device_scope_name, test) loss = cifar100_resnet32_loss(pred, label) input_image_train.append({"image": image, "label": label}) preds_train.append(pred) losses_train.append(loss) # Create validation graph test = True device_scope_name = "device{}".format(0) image_valid = nn.Variable((bs_valid, 3, 32, 32)) pred_valid = cifar100_resnet23_prediction( image_valid, ctxs[i], device_scope_name, test) input_image_valid = {"image": image_valid} # Solvers solvers = [] for i in range(n_devices): with nn.context_scope(ctxs[i]): solver = S.Adam() device_scope_name = "device{}".format(i) with nn.parameter_scope(device_scope_name): params = nn.get_parameters() solver.set_parameters(params) solvers.append(solver) # Communicator comm = C.DataParalellCommunicator(ctx) for i in range(n_devices): device_scope_name = "device{}".format(i) with nn.parameter_scope(device_scope_name): ctx = ctxs[i] params = nn.get_parameters() comm.add_context_and_parameters((ctx, params)) comm.init() # Create threadpools with one thread pools = [] for _ in range(n_devices): pool = ThreadPool(processes=1) pools.append(pool) # Once forward/backward to safely secure memory for device_id in range(n_devices): data, label = \ (np.random.randn(*input_image_train[device_id]["image"].shape), (np.random.rand(*input_image_train[device_id]["label"].shape) * 10).astype(np.int32)) ret = pools[device_id].apply_async(forward_backward, (input_image_train[device_id]["image"], data, input_image_train[device_id]["label"], label, losses_train[device_id], solvers[device_id])) ret.get() losses_train[device_id].d # sync to host # Create monitor. from nnabla.monitor import Monitor, MonitorSeries, MonitorTimeElapsed monitor = Monitor(args.monitor_path) monitor_loss = MonitorSeries("Training loss", monitor, interval=10) monitor_err = MonitorSeries("Training error", monitor, interval=10) monitor_time = MonitorTimeElapsed("Training time", monitor, interval=100) monitor_verr = MonitorSeries("Test error", monitor, interval=10) with data_iterator_cifar100(args.batch_size, True) as tdata, \ data_iterator_cifar100(bs_valid, False) as vdata: # Training-loop for i in range(int(args.max_iter / n_devices)): # Validation if i % int(n_train_samples / args.batch_size / n_devices) == 0: ve = 0. for j in range(args.val_iter): image, label = vdata.next() input_image_valid["image"].d = image pred_valid.forward() ve += categorical_error(pred_valid.d, label) ve /= args.val_iter monitor_verr.add(i * n_devices, ve) if i % int(args.model_save_interval / n_devices) == 0: nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % i)) # Forwards/Zerograd/Backwards fb_results = [] for device_id in range(n_devices): image, label = tdata.next() res = pools[device_id].apply_async(forward_backward, (input_image_train[device_id]["image"], image, input_image_train[device_id]["label"], label, losses_train[device_id], solvers[device_id])) fb_results.append(res) for device_id in range(n_devices): fb_results[device_id].get() # In-place Allreduce comm.allreduce() # Solvers update for device_id in range(n_devices): solvers[device_id].update() e = categorical_error( preds_train[-1].d, input_image_train[-1]["label"].d) monitor_loss.add(i * n_devices, losses_train[-1].d.copy()) monitor_err.add(i * n_devices, e) monitor_time.add(i * n_devices) nn.save_parameters(os.path.join( args.model_save_path, 'params_%06d.h5' % (args.max_iter / n_devices)))
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = args.n_label n_train_data = 73257 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = args.epoch act = F.relu iter_epoch = n_train_data / batch_size n_iter = int(n_epoch * iter_epoch) extension_module = args.context # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred = cnn_model_003(ctx, x_l) loss_ce = ce_loss(ctx, pred, y_l) loss_er = er_loss(ctx, pred) loss_supervised = loss_ce + loss_er ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0 = cnn_model_003(ctx, x_u0) pred_x_u1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss(ctx, pred_x_u0, pred_x_u1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) loss_unsupervised = loss_sr + loss_er0 + loss_er1 ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/svhn/train.mat") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/svhn/l_train.mat") u_train_path = os.path.join(home, "datasets/svhn/u_train.mat") test_path = os.path.join(home, "datasets/svhn/test.mat") # data reader data_reader = SVHNDataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=False, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if int((i+1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 ve /= iter_val msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve) * 100) print(msg) if ve < ve_best: if not os.path.exists(args.model_save_path): os.makedirs(args.model_save_path) if save_path_prev != "": os.remove(save_path_prev) save_path = os.path.join( args.model_save_path, 'params_%06d.h5' % epoch) nn.save_parameters(save_path) save_path_prev = save_path ve_best = ve st = time.time() epoch +=1
def main(args): # Settings device_id = args.device_id batch_sizes = [16, 32, 64] batch_size_eval = 64 c, h, w = 3, 32, 32 n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / int(np.mean(batch_sizes)) # approximate epoch n_iter = n_epoch * iter_epoch extension_module = args.context # Model (Batch-Stochastic) ctx = extension_context(extension_module, device_id=device_id) ## supervised x_list, y_list, preds, losses_ce = batch_stochastic_supervised_network( ctx, batch_sizes, c, h, w) ## stochastic regularization x0_list, x1_list, _, losses_sr = batch_stochastic_unsupervised_network( ctx, batch_sizes, c, h, w) ## evaluate batch_size_eval, m, h, w = batch_size_eval, c, h, w x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_sizes[0], n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. iter_ = 0 for i in range(n_iter): idx = np.random.choice(np.arange(0, len(batch_sizes))) idx_u = np.random.choice(np.arange(0, len(batch_sizes))) # Get data bs = batch_sizes[idx] bs_u = batch_sizes[idx_u] x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch(bs) x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch(bs_u) # Set it to the varaibles x_l = x_list[idx] y_l = y_list[idx] x_u0 = x0_list[idx_u] x_u1 = x1_list[idx_u] x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_ce = losses_ce[idx] loss_sr = losses_sr[idx_u] loss_ce.forward(clear_no_need_grad=True) loss_sr.forward(clear_no_need_grad=True) solver.zero_grad() loss_ce.backward(clear_buffer=True) loss_sr.backward(clear_buffer=True) solver.update() # Evaluate if (i+1) % iter_epoch == 0: # approximate epoch # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch +=1
def main(): """ Main script. Steps: * Get and set context. * Load Dataset * Initialize DataIterator. * Create Networks * Net for Labeled Data * Net for Unlabeled Data * Net for Test Data * Create Solver. * Training Loop. * Test * Training * by Labeled Data * Calculate Cross Entropy Loss * by Unlabeled Data * Estimate Adversarial Direction * Calculate LDS Loss """ args = get_args() # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) shape_x = (1, 28, 28) n_h = args.n_units n_y = args.n_class # Load MNist Dataset from mnist_data import MnistDataSource with MnistDataSource(train=True) as d: x_t = d.images t_t = d.labels with MnistDataSource(train=False) as d: x_v = d.images t_v = d.labels x_t = np.array(x_t / 256.0).astype(np.float32) x_t, t_t = x_t[:args.n_train], t_t[:args.n_train] x_v, t_v = x_v[:args.n_valid], t_v[:args.n_valid] # Create Semi-supervised Datasets x_l, t_l, x_u, _ = split_dataset(x_t, t_t, args.n_labeled, args.n_class) x_u = np.r_[x_l, x_u] x_v = np.array(x_v / 256.0).astype(np.float32) # Create DataIterators for datasets of labeled, unlabeled and validation di_l = DataIterator(args.batchsize_l, [x_l, t_l]) di_u = DataIterator(args.batchsize_u, [x_u]) di_v = DataIterator(args.batchsize_v, [x_v, t_v]) # Create networks # feed-forward-net building function def forward(x, test=False): return mlp_net(x, n_h, n_y, test) # Net for learning labeled data xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False) hl = forward(xl, test=False) tl = nn.Variable((args.batchsize_l, 1), need_grad=False) loss_l = F.mean(F.softmax_cross_entropy(hl, tl)) # Net for learning unlabeled data xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) r = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True) eps = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False) loss_u, yu = vat(xu, r, eps, forward, distance) # Net for evaluating valiation data xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False) hv = forward(xv, test=True) tv = nn.Variable((args.batchsize_v, 1), need_grad=False) # Create solver solver = S.Adam(args.learning_rate) solver.set_parameters(nn.get_parameters()) # Monitor trainig and validation stats. import nnabla.monitor as M monitor = M.Monitor(args.model_save_path) monitor_verr = M.MonitorSeries("Test error", monitor, interval=240) monitor_time = M.MonitorTimeElapsed("Elapsed time", monitor, interval=240) # Training Loop. t0 = time.time() for i in range(args.max_iter): # Validation Test if i % args.val_interval == 0: n_error = calc_validation_error( di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, n_error) ################################# ## Training by Labeled Data ##### ################################# # input minibatch of labeled data into variables xl.d, tl.d = di_l.next() # initialize gradients solver.zero_grad() # forward, backward and update loss_l.forward(clear_no_need_grad=True) loss_l.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ################################# ## Training by Unlabeled Data ### ################################# # input minibatch of unlabeled data into variables xu.d, = di_u.next() ##### Calculate Adversarial Noise ##### # Sample random noise n = np.random.normal(size=xu.shape).astype(np.float32) # Normalize noise vector and input to variable r.d = get_direction(n) # Set xi, the power-method scaling parameter. eps.data.fill(args.xi_for_vat) # Calculate y without noise, only once. yu.forward(clear_buffer=True) # Do power method iteration for k in range(args.n_iter_for_power_method): # Initialize gradient to receive value r.grad.zero() # forward, backward, without update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) # Normalize gradinet vector and input to variable r.d = get_direction(r.g) ##### Calculate loss for unlabeled data ##### # Clear remained gradients solver.zero_grad() # Set epsilon, the adversarial noise scaling parameter. eps.data.fill(args.eps_for_vat) # forward, backward and update loss_u.forward(clear_no_need_grad=True) loss_u.backward(clear_buffer=True) solver.weight_decay(args.weight_decay) solver.update() ##### Learning rate update ##### if i % args.iter_per_epoch == 0: solver.set_learning_rate( solver.learning_rate() * args.learning_rate_decay) monitor_time.add(i) # Evaluate the final model by the error rate with validation dataset valid_error = calc_validation_error(di_v, xv, tv, hv, args.val_iter) monitor_verr.add(i, valid_error) monitor_time.add(i) # Save the model. nnp_file = os.path.join( args.model_save_path, 'vat_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Validation', 'batch_size': args.batchsize_v, 'outputs': {'y': hv}, 'names': {'x': xv}}], 'executors': [ {'name': 'Runtime', 'network': 'Validation', 'data': ['x'], 'output': ['y']}]} save.save(nnp_file, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [xv.d], [xv], hv, nnp_file)
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = int(n_train_data / batch_size) n_iter = n_epoch * iter_epoch extension_module = args.context # Model views = [global_view, spatial_view, feature_view] ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) feature = cnn_model_003(ctx, x_l) loss_supervised = [] for view in views: pred = view(ctx, feature) loss_ce = ce_loss(ctx, pred, y_l) loss_er = er_loss(ctx, pred) loss_supervised += [loss_ce, loss_er] loss_supervised = reduce(lambda x, y: x+y, loss_supervised) ## cross view loss x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) feature_x_u0 = cnn_model_003(ctx, x_u0) feature_x_u1 = cnn_model_003(ctx, x_u1) pred_x_u0 = [] pred_x_u1 = [] loss_er = [] loss_unsupervised = [] for view in views: pred = view(ctx, feature_x_u0) pred_x_u0 += [pred] loss_er +=[er_loss(ctx, pred)] pred = view(ctx, feature_x_u1) pred_x_u1 += [pred] loss_er += [er_loss(ctx, pred)] for pred_a, pred_b in itertools.product(pred_x_u0, pred_x_u1): # multi-view if pred_a == pred_b: continue loss_unsupervised += [sr_loss(ctx, pred_a, pred_b)] loss_unsupervised = reduce(lambda x, y: x+y, loss_unsupervised) \ + reduce(lambda x, y: x+y, loss_er) ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) feature_eval = cnn_model_003(ctx, x_eval, test=True) pred_eval = [] for view in views: pred_eval += [view(ctx, feature_eval)] # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. ve_best = 1. save_path_prev = "" for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train loss_supervised.forward(clear_no_need_grad=True) loss_unsupervised.forward(clear_no_need_grad=True) solver.zero_grad() loss_supervised.backward(clear_buffer=True) loss_unsupervised.backward(clear_buffer=True) solver.update() # Evaluate if int((i + 1) % iter_epoch) == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = [0., 0., 0.] iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) feature_eval.forward(clear_buffer=True) for i in range(len(pred_eval)): pred_eval[i].forward() ve[i] += categorical_error(pred_eval[i].d, label) iter_val += 1 for i, e in enumerate(ve): e /= iter_val msg = "Epoch-{}:{},ElapsedTime:{},Acc:{:02f}".format( i, epoch, time.time() - st, (1. - e) * 100) print(msg) st = time.time() epoch +=1
def main(args): # Settings device_id = args.device_id batch_size = args.batch_size batch_size_eval = args.batch_size_eval n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 300 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context # Model ## supervised batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_l = nn.Variable((batch_size, m, h, w)) y_l = nn.Variable((batch_size, 1)) pred, log_var = cnn_model_003(ctx, x_l) one = F.constant(1., log_var.shape) loss_ce = ce_loss_with_uncertainty(ctx, pred, y_l, log_var) reg_sigma = sigma_regularization(ctx, log_var, one) loss_supervised = loss_ce + reg_sigma ## stochastic regularization x_u0 = nn.Variable((batch_size, m, h, w)) x_u1 = nn.Variable((batch_size, m, h, w)) pred_x_u0, log_var0 = cnn_model_003(ctx, x_u0) pred_x_u1, log_var1 = cnn_model_003(ctx, x_u1) loss_sr = sr_loss_with_uncertainty(ctx, pred_x_u0, pred_x_u1, log_var0, log_var1) loss_er0 = er_loss(ctx, pred_x_u0) loss_er1 = er_loss(ctx, pred_x_u1) reg_sigma0 = sigma_regularization(ctx, log_var0, one) reg_sigma1 = sigma_regularization(ctx, log_var1, one) loss_unsupervised = loss_sr + loss_er0 + loss_er1 \ + reg_sigma0 + reg_sigma1 ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval, _ = cnn_model_003(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver_l= S.Adam(alpha=learning_rate) solver_l.set_parameters(nn.get_parameters()) solver_u= S.Adam(alpha=learning_rate) solver_u.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_l0_data, x_l1_data, y_l_data = data_reader.get_l_train_batch() x_u0_data, x_u1_data, y_u_data = data_reader.get_u_train_batch() x_l.d, _ , y_l.d= x_l0_data, x_l1_data, y_l_data x_u0.d, x_u1.d= x_u0_data, x_u1_data # Train ## for supervised loss loss_supervised.forward(clear_no_need_grad=True) solver_l.zero_grad() loss_supervised.backward(clear_buffer=True) solver_l.update() ## for unsupervised loss loss_unsupervised.forward(clear_no_need_grad=True) solver_u.zero_grad() loss_unsupervised.backward(clear_buffer=True) solver_u.update() # Evaluate if (i+1) % iter_epoch == 0: # Get data and set it to the varaibles x_data, y_data = data_reader.get_test_batch() # Evaluation loop ve = 0. iter_val = 0 for k in range(0, len(x_data), batch_size_eval): x_eval.d = get_test_data(x_data, k, batch_size_eval) label = get_test_data(y_data, k, batch_size_eval) pred_eval.forward(clear_buffer=True) ve += categorical_error(pred_eval.d, label) iter_val += 1 msg = "Epoch:{},ElapsedTime:{},Acc:{:02f}".format( epoch, time.time() - st, (1. - ve / iter_val) * 100) print(msg) st = time.time() epoch +=1
def main(args): # Settings device_id = args.device_id batch_size = 100 batch_size_eval = 100 n_l_train_data = 4000 n_train_data = 50000 n_cls = 10 learning_rate = 1. * 1e-3 n_epoch = 50 act = F.relu iter_epoch = n_train_data / batch_size n_iter = n_epoch * iter_epoch extension_module = args.context n_images = args.n_images fname, _ = os.path.splitext(__file__) dpath = "./{}_images_{}".format(fname, int(time.time())) # Model batch_size, m, h, w = batch_size, 3, 32, 32 ctx = extension_context(extension_module, device_id=device_id) x_u = nn.Variable((batch_size, m, h, w)) pred = cnn_ae_model_001(ctx, x_u) loss_recon = recon_loss(ctx, pred, x_u) ## evaluate batch_size_eval, m, h, w = batch_size, 3, 32, 32 x_eval = nn.Variable((batch_size_eval, m, h, w)) pred_eval = cnn_ae_model_001(ctx, x_eval, test=True) # Solver with nn.context_scope(ctx): solver = S.Adam(alpha=learning_rate) solver.set_parameters(nn.get_parameters()) # Dataset ## separate dataset home = os.environ.get("HOME") fpath = os.path.join(home, "datasets/cifar10/cifar-10.npz") separator = Separator(n_l_train_data) separator.separate_then_save(fpath) l_train_path = os.path.join(home, "datasets/cifar10/l_cifar-10.npz") u_train_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") test_path = os.path.join(home, "datasets/cifar10/cifar-10.npz") # data reader data_reader = Cifar10DataReader(l_train_path, u_train_path, test_path, batch_size=batch_size, n_cls=n_cls, da=True, shape=True) # Training loop print("# Training loop") epoch = 1 st = time.time() acc_prev = 0. for i in range(n_iter): # Get data and set it to the varaibles x_u_data, _, _ = data_reader.get_u_train_batch() x_u.d = x_u_data # Train loss_recon.forward(clear_no_need_grad=True) solver.zero_grad() loss_recon.backward(clear_buffer=True) solver.update() # Evaluate if (i+1) % iter_epoch == 0: # Get data and forward x_data, y_data = data_reader.get_test_batch() pred_eval.forward(clear_buffer=True) images = pred_eval.d # Save n images if not os.path.exists(dpath): os.makedirs(dpath) save_images(dpath, epoch, images[:n_images]) fpath = os.path.join(dpath, "epoch_{:05d}.h5".format(epoch)) nn.save_parameters(fpath) st = time.time() epoch +=1