def test_weight_clipping(w_clip, optimizer): opt_ng = optimizer(0.1, weight_clip_value=w_clip) # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested # make sure initial values are higher than clip values np_W = 10 * w_clip * (2 * np.random.rand(C.length) - 1) W = ng.variable([C], initial_value=np_W) # double check generated initial W value assert np.max(np_W) > w_clip assert np.min(np_W) < -w_clip # Set up op graph cost = ng.sum(target - ng.dot(W, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost), W]) epsilon = w_clip * 1e-3 # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation(updated_weights, data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: ng_W = opt_ng_comp(x, y) # updated weights for ngraph optimizer assert np.max(ng_W) < w_clip + epsilon assert np.min(ng_W) > -w_clip - epsilon
def test_variable(): input_axes = ng.make_axes([ng.make_axis(10), ng.make_axis(3)]) var = ng.variable(axes=input_axes) assign_val = np.random.rand(10, 3) var_assign = ng.AssignOp(tensor=var, val=assign_val) var_seq = ng.sequential([var_assign, var]) var_comp = ng.computation(var_seq, "all") results = dict() weight_saver = Saver() with closing(ngt.make_transformer()) as transformer: var_func = transformer.add_computation(var_comp) weight_saver.setup_save(transformer=transformer, computation=var_comp) results['saved'] = var_func().copy() weight_saver.save(filename="test_variable") reassign_val = np.random.rand(10, 3) var_reassign = ng.AssignOp(tensor=var, val=reassign_val) var_recomp = ng.computation(var_reassign, "all") var_read = ng.computation(var, "all") with closing(ngt.make_transformer()) as restore_transformer: var_recompfunc = restore_transformer.add_computation(var_recomp) weight_saver.setup_restore(transformer=restore_transformer, computation=var_recomp, filename="test_variable") var_readfunc = restore_transformer.add_computation(var_read) var_recompfunc() results['reassigned'] = var_readfunc().copy() weight_saver.restore() results['restored'] = var_readfunc().copy() os.remove("test_variable.npz") assert np.allclose(results['saved'], assign_val, atol=0) assert np.allclose(results['reassigned'], reassign_val, atol=0) assert np.allclose(results['saved'], results['restored'], atol=0)
def compare_optimizer_variable_select(opt_ng, opt_ref): # Set up data placeholders C = ng.make_axis(20) N = ng.make_axis(32, name='N') data = ng.placeholder([C, N]) target = ng.placeholder([N]) # params to be updated using optimizer to be tested np_W1 = np.random.rand(C.length) np_W2 = np.random.rand(C.length) W1 = ng.variable([C], initial_value=np_W1) W2 = ng.variable([C], initial_value=np_W2) # Set up op graph cost = ng.sum(target - ng.dot(W1, data) - ng.dot(W2, data), out_axis=()) updated_weights = ng.sequential([opt_ng(cost, variables=[W1]), W1]) # Set up the computation and run the "train" loop with ExecutorFactory() as ex: opt_ng_comp = ex.transformer.computation([updated_weights, W2], data, target) mock_dataset = data_generator(20, C.length, N.length) for x, y in mock_dataset: [ng_W1, ng_W2] = opt_ng_comp(x, y) # updated weights for ngraph optimizer np_W1 = opt_ref(x, np_W1) # updated weights for reference optimizer ng.testing.assert_allclose(np_W1, ng_W1, rtol=1e-3) ng.testing.assert_allclose(np_W2, ng_W2, rtol=1e-3)
def test_modify_state(): with ExecutorFactory() as ex: N = ng.make_axis(3, name='N') x_np = np.ones((N.length)) * 4 x = ng.variable([N], initial_value=x_np).named('x') val = ng.sequential([ng.assign(x, x + x), x]) f = ex.executor(val) x_val = f() assert np.allclose(x_np + x_np, x_val)
def __call__(self, *args, **kwargs): if len(self.ops) == 0: self.beta_1 = ng.constant(self.beta_1, dtype=np.float32) self.beta_2 = ng.constant(self.beta_2, dtype=np.float32) self.t = ng.persistent_tensor(axes=(), initial_value=0) self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t]) self.ell = self.lrate * ng.sqrt(1 - self.beta_2 ** self.t) / (1 - self.beta_1 ** self.t) return super(Adam, self).__call__(*args, **kwargs)
def variable_update(self, variable, grad, scale_factor, weight_clip_value): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=grad.axes, initial_value=0.) updates = ng.sequential([ ng.assign(state, state + ng.square(grad)), ng.assign(variable, clip_weight_value(variable - (scale_factor * self.lrate * grad) / (ng.sqrt(state + self.epsilon)), weight_clip_value)) ]) return updates
def test_sequential(N): x = ng.variable([N], initial_value=0) x0 = x + x x1 = x + x p = ng.sequential([x0, ng.assign(x, 2), x1, x0]) with ExecutorFactory() as ex: x0_val, x1_val, p_val = ex.executor([x0, x1, p])() assert x0_val == 0 assert x1_val == 4 assert p_val == 0
def test_sequential_side(M): x1_np = 2 x2_np = 3 b_np = 1 x_np = np.array([1, 2, 3], dtype=np.float32) x = ng.variable([M], initial_value=x_np) x1 = ng.persistent_tensor(axes=(), initial_value=x1_np) x2 = ng.persistent_tensor(axes=(), initial_value=x2_np) x1_vo = ng.value_of(x1) x2_vo = ng.value_of(x2) b = ng.persistent_tensor(axes=(), initial_value=b_np) y = ng.sequential([ x1_vo, x2_vo, ng.assign(x1, ng.sum(x, out_axes=()) + x1 * b + (1 - b)), ng.assign(x2, ng.mean(x, out_axes=()) + x2 * b + (1 - b)), x * 2 ]) with ExecutorFactory() as ex: main_effect = ex.executor((y, x1_vo, x2_vo, x1, x2)) current_values = ex.executor((x1, x2)) # Run main path #1 y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect( ) y_np = x_np * 2 assert np.allclose(y_val, y_np) assert np.allclose(x1_init_val, x1_np) assert np.allclose(x2_init_val, x2_np) x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np) x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np) assert np.allclose(x1_final_val, x1_np) assert np.allclose(x2_final_val, x2_np) x1_val, x2_val = current_values() assert np.allclose(x1_val, x1_np) assert np.allclose(x2_val, x2_np) # Run main path #2 (Should be the same as before) y_val, x1_init_val, x2_init_val, x1_final_val, x2_final_val = main_effect( ) y_np = x_np * 2 assert np.allclose(y_val, y_np) assert np.allclose(x1_init_val, x1_np) assert np.allclose(x2_init_val, x2_np) x1_np = np.sum(x_np) + x1_np * b_np + (1 - b_np) x2_np = np.mean(x_np) + x2_np * b_np + (1 - b_np) assert np.allclose(x1_final_val, x1_np) assert np.allclose(x2_final_val, x2_np)
def variable_update(self, variable, grad, scale_factor, weight_clip_value): m = ng.persistent_tensor(axes=grad.axes, initial_value=0.) v = ng.persistent_tensor(axes=grad.axes, initial_value=0.) updates = ng.sequential([ ng.assign(m, m * self.beta_1 + (1 - self.beta_1) * grad), ng.assign(v, v * self.beta_2 + (1 - self.beta_2) * grad * grad), ng.assign(variable, clip_weight_value(variable - (scale_factor * self.ell * m) / (ng.sqrt(v) + self.epsilon), weight_clip_value)) ]) return updates
def __call__(self, in_obj): if not self.initialized: w_axis = ng.make_axis() self.weight = ng.variable(axes=[w_axis], initial_value=2, metadata={"label": LABELS["weight"]}, name="W") self.side_effect = ng.persistent_tensor(axes=[w_axis], initial_value=0) return ng.sequential([ng.assign(self.side_effect, self.weight), self.weight * in_obj])
def __call__(self, cost_func, variables=None, subgraph=None, warning=False): """ Arguments: cost_func (Op): The cost function to optimize variables (list of variables): List of variables to optimize subgraph (SubGraph): A subgraph instance containing all variables to optimize warning (bool): If True displays warning message if any variables specified do not participate in batch cost computation .. Note:: If subgraph is provided, the variables to optimize will be taken from it. Otherwise, they can be provided explicitly by passing a list as `variables`. If neither `subgraph` nor `variables` is provided, the variables to optimize will be all trainable variables on which `cost` depends. """ all_updates = [] batch_cost = ng.sum(cost_func, out_axes=()) if cost_func.axes.batch_axis() is None: batch_size = 1 else: batch_size = cost_func.axes.batch_axis().length # determine variables to optimize if subgraph is not None: if variables is not None: raise ValueError("variables and subgraph cannot both be specified.") variables = list(subgraph.variables.values()) if variables is None: variables = batch_cost.variables() elif variables is not None and warning is True: all_variables = batch_cost.variables() selected_variables = all_variables & set(variables) if len(selected_variables) < len(variables): logger.warn("not all selected variables participate in cost computation") # gradients grads = [ng.deriv(batch_cost, v) / batch_size for v in variables] scale_factor = clip_gradient_norm(grads, self.gradient_clip_norm) # updates for variable, grad in zip(variables, grads): updates = self.variable_update(variable, grad, scale_factor, self.weight_clip_value) all_updates.append(updates) updates = ng.doall(all_updates) # grads = ng.doall(grads) # clips = ng.doall([ng.assign(variable, # clip_weight_value(variable, self.weight_clip_value)) # for variable in variables]) # return ng.sequential([grads, updates, clips, 0]) # return ng.sequential([grads, updates, 0]) return ng.sequential([updates, 0])
def test_scope_ops(input_placeholder): """ Test scope_ops creates a subgraph with correct attributes """ with scope_ops(name="foo") as subgraph: w = ng.variable(ng.make_axis(), initial_value=1, name="W") y = w * input_placeholder z = y + 4 v1 = ng.persistent_tensor(w.axes, initial_value=0, name="effect1") v2 = ng.persistent_tensor(w.axes, initial_value=0, name="effect2") ng.sequential([ng.assign(v1, w), ng.assign(v2, w), z.named("output")]) assert len(subgraph.inputs) == 1 assert input_placeholder.unscoped_name in subgraph.inputs assert len(subgraph.variables) == 1 assert "W" in subgraph.variables assert len(subgraph.outputs) == 1 assert "output" in subgraph.outputs assert len(subgraph.side_effects) == 2
def variable_update(self, variable, grad, scale_factor, weight_clip_value): epsilon, decay = (self.epsilon, self.decay_rate) grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=1.) velocity = ng.persistent_tensor(axes=variable.axes, initial_value=0.).named(variable.name + '_vel') updates = ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign(velocity, velocity * self.momentum + (self.lrate * scale_factor * grad / ng.sqrt(state + epsilon)) + self.lrate * self.wdecay * variable), ng.assign(variable, clip_weight_value(variable - velocity, weight_clip_value)) ]) return updates
def test_sequential_reduce(M): x = ng.variable([M], initial_value=1) x0 = x + x x1 = ng.sum(x0, out_axes=()) x2 = ng.sum(x0, out_axes=()) + x0 p = ng.sequential([x0, x1, x2]) with ExecutorFactory() as ex: x0_val, x1_val, x2_val, p_val, x_val = ex.executor([x0, x1, x2, p, x])() x0_np = x_val + x_val x1_np = np.sum(x0_np) x2_np = x1_np + x0_np assert np.allclose(x0_val, x0_np) assert np.allclose(x1_val, x1_np) assert np.allclose(x2_val, x2_np) assert np.allclose(p_val, x2_np)
def variable_update(self, variable, grad, scale_factor, weight_clip_value): updates = [] """ for op in ng.Op.ordered_ops([grad]): op_var = ng.persistent_tensor(axes=op.tensor.axes, initial_value=0.).named(variable.name + '_' + op.name) updates.append(ng.assign(op_var, op)) """ velocity = ng.persistent_tensor(axes=variable.axes, initial_value=0.).named(variable.name + '_vel') clip_grad = clip_gradient_value(grad, self.gradient_clip_value) lr = - self.lrate * (scale_factor * clip_grad + self.wdecay * variable) updates.append(ng.assign(velocity, velocity * self.momentum_coef + lr)) if self.nesterov: delta = (self.momentum_coef * velocity + lr) else: delta = velocity updates.append(ng.assign(variable, clip_weight_value(variable + delta, weight_clip_value))) return ng.sequential(updates)
def test_logreg(): # xs: (C, N), y: (N,) xs = np.array([[0.52, 0.88, 0.52, 0.74], [1.12, -1.08, 0.06, -2.49], [0.77, 0.15, -1.3, 1.39]]) ys = np.array([1, 1, 0, 1]) max_iter = 10 alpha = 0.1 thetas = np.array([0., 0., 0.]) np_logreg = NumpyLogreg(xs, ys, thetas) C, N = ng.make_axis(length=3), ng.make_axis(length=4) # input tensors xs_v = ng.placeholder((C, N)) ys_v = ng.placeholder([N]) alpha_v = ng.placeholder(()) thetas_var = ng.variable([C], initial_value=thetas) # define ops ys_pred = ng.sigmoid(ng.dot(thetas_var, xs_v)) log_likelihoods = ng.log(ys_pred) * ys_v + ng.log(1 - ys_pred) * (1 - ys_v) loss = -ng.sum(log_likelihoods, reduction_axes=[N]) grad_comp = ng.deriv(loss, thetas_var) weight_update = ng.sequential( [ng.assign(thetas_var, thetas_var - alpha_v * grad_comp), thetas_var]) # transformer with ExecutorFactory() as ex: train_eval_func = ex.executor([grad_comp, loss, weight_update], xs_v, ys_v, alpha_v) # evaluate for i in range(max_iter): grad_np, loss_np, thetas_np = np_logreg.optimize(alpha) grad_ng, loss_ng, thetas_ng = train_eval_func(xs, ys, alpha) ng.testing.assert_allclose(loss_np, loss_ng, rtol=1e-05, atol=1e-05, \ transformer_overwrite=False) ng.testing.assert_allclose(grad_np, grad_ng, rtol=1e-05, atol=1e-05, \ transformer_overwrite=False) ng.testing.assert_allclose(thetas_np, thetas_ng, rtol=1e-05, atol=1e-05, \ transformer_overwrite=False)
lr_schedule = { 'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations, use_progress_bar=args.progress_bar) loop_train(train_set, cbs)