def onnx_to_singa(niter, use_cpu=False): if use_cpu: print("Using CPU") dev = device.get_default_device() else: print("Using GPU") dev = device.create_cuda_gpu() model = sonnx.load("mlp.onnx") backend = sonnx.prepare(model, device=dev) sgd = opt.SGD(0.1) inputs = Tensor( data=data, device=dev, requires_grad=False, stores_grad=False, name="input", ) target = Tensor( data=label, device=dev, requires_grad=False, stores_grad=False, name="target", ) for i in range(100): y = backend.run([inputs])[0] loss = autograd.softmax_cross_entropy(y, target) for p, gp in autograd.backward(loss): sgd.update(p, gp) loss_rate = tensor.to_numpy(loss)[0] accuracy_rate = accuracy(tensor.to_numpy(y), label) print("Iter {}, accurate={}, loss={}".format(i, accuracy_rate, loss_rate))
def call(self, step): if self.staircase: s = step // self.decay_steps else: s = step / self.decay_steps ret = Tensor((1, ), s.device) ret.set_value(self.decay_rate) return self.init_value * tensor.pow(ret, s)
def generate_data(self, dev, num=400): f = lambda x: (5 * x + 1) x = np.random.uniform(-1, 1, num) y = f(x) + 2 * np.random.randn(len(x)) self.label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]) self.data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np.float32) self.label = self.to_categorical(self.label, 2).astype(np.float32) self.inputs = Tensor(data=self.data, device=dev) self.target = Tensor(data=self.label, device=dev)
class MLP(module.Module): def __init__(self, optimizer): super(MLP, self).__init__() self.w0 = Tensor(shape=(2, 3), requires_grad=True, stores_grad=True) self.b0 = Tensor(shape=(3, ), requires_grad=True, stores_grad=True) self.w1 = Tensor(shape=(3, 2), requires_grad=True, stores_grad=True) self.b1 = Tensor(shape=(2, ), requires_grad=True, stores_grad=True) self.w0.gaussian(0.0, 0.1) self.b0.set_value(0.0) self.w1.gaussian(0.0, 0.1) self.b1.set_value(0.0) self.optimizer = optimizer def forward(self, inputs): x = autograd.matmul(inputs, self.w0) x = autograd.add_bias(x, self.b0) x = autograd.relu(x) x = autograd.matmul(x, self.w1) x = autograd.add_bias(x, self.b1) return x def loss(self, out, target): return autograd.softmax_cross_entropy(out, target) def optim(self, loss): return self.optimizer.backward_and_update(loss)
def __init__(self, lr): # init lr(could be a constant scalar or a learning rate scheduler) if type(lr) == float or type(lr) == int: self.lr = Constant(lr) elif isinstance(lr, DecayScheduler): self.lr = lr else: raise TypeError("Wrong learning rate type") # init step counter # TODO change type to int32 self.step_counter = Tensor((1, ), dtype=tensor.float32) self.step_counter.set_value(0) self.lr_value = self.lr(self.step_counter)
class MLP(module.Module): def __init__(self, data_size=10, perceptron_size=100, num_classes=10): super(MLP, self).__init__() self.num_classes = num_classes self.dimension = 2 self.w0 = Tensor(shape=(data_size, perceptron_size), requires_grad=True, stores_grad=True) self.w0.gaussian(0.0, 0.1) self.b0 = Tensor(shape=(perceptron_size, ), requires_grad=True, stores_grad=True) self.b0.set_value(0.0) self.w1 = Tensor(shape=(perceptron_size, num_classes), requires_grad=True, stores_grad=True) self.w1.gaussian(0.0, 0.1) self.b1 = Tensor(shape=(num_classes, ), requires_grad=True, stores_grad=True) self.b1.set_value(0.0) def forward(self, inputs): x = autograd.matmul(inputs, self.w0) x = autograd.add_bias(x, self.b0) x = autograd.relu(x) x = autograd.matmul(x, self.w1) x = autograd.add_bias(x, self.b1) return x def loss(self, out, ty): return autograd.softmax_cross_entropy(out, ty) def optim(self, loss, dist_option, spars): if dist_option == 'fp32': self.optimizer.backward_and_update(loss) elif dist_option == 'fp16': self.optimizer.backward_and_update_half(loss) elif dist_option == 'partialUpdate': self.optimizer.backward_and_partial_update(loss) elif dist_option == 'sparseTopK': self.optimizer.backward_and_sparse_update(loss, topK=True, spars=spars) elif dist_option == 'sparseThreshold': self.optimizer.backward_and_sparse_update(loss, topK=False, spars=spars) def set_optimizer(self, optimizer): self.optimizer = optimizer
def __init__(self, optimizer): super(MLP, self).__init__() self.w0 = Tensor(shape=(2, 3), requires_grad=True, stores_grad=True) self.b0 = Tensor(shape=(3, ), requires_grad=True, stores_grad=True) self.w1 = Tensor(shape=(3, 2), requires_grad=True, stores_grad=True) self.b1 = Tensor(shape=(2, ), requires_grad=True, stores_grad=True) self.w0.gaussian(0.0, 0.1) self.b0.set_value(0.0) self.w1.gaussian(0.0, 0.1) self.b1.set_value(0.0) self.optimizer = optimizer
def __init__(self, data_size=10, perceptron_size=100, num_classes=10): super(MLP, self).__init__() self.num_classes = num_classes self.dimension = 2 self.w0 = Tensor(shape=(data_size, perceptron_size), requires_grad=True, stores_grad=True) self.w0.gaussian(0.0, 0.1) self.b0 = Tensor(shape=(perceptron_size, ), requires_grad=True, stores_grad=True) self.b0.set_value(0.0) self.w1 = Tensor(shape=(perceptron_size, num_classes), requires_grad=True, stores_grad=True) self.w1.gaussian(0.0, 0.1) self.b1 = Tensor(shape=(num_classes, ), requires_grad=True, stores_grad=True) self.b1.set_value(0.0)
num_classes: total number of classes. Return A binary matrix representation of the input. """ y = np.array(y, dtype="int") n = y.shape[0] categorical = np.zeros((n, num_classes)) categorical[np.arange(n), y] = 1 return categorical label = to_categorical(label, 2).astype(np.float32) print("train_data_shape:", data.shape) print("train_label_shape:", label.shape) inputs = Tensor(data=data) target = Tensor(data=label) w0 = Tensor(shape=(2, 3), requires_grad=True, stores_grad=True) w0.gaussian(0.0, 0.1) b0 = Tensor(shape=(1, 3), requires_grad=True, stores_grad=True) b0.set_value(0.0) w1 = Tensor(shape=(3, 2), requires_grad=True, stores_grad=True) w1.gaussian(0.0, 0.1) b1 = Tensor(shape=(1, 2), requires_grad=True, stores_grad=True) b1.set_value(0.0) sgd = optimizer.SGD(0.05) # training process for i in range(1001):
A binary matrix representation of the input. ''' y = np.array(y, dtype='int') n = y.shape[0] categorical = np.zeros((n, num_classes)) categorical[np.arange(n), y] = 1 return categorical label = to_categorical(label, 2).astype(np.float32) print('train_data_shape:', data.shape) print('train_label_shape:', label.shape) inputs = Tensor(data=data) target = Tensor(data=label) w0 = Tensor(shape=(2, 3), requires_grad=True, stores_grad=True) w0.gaussian(0.0, 0.1) b0 = Tensor(shape=(1, 3), requires_grad=True, stores_grad=True) b0.set_value(0.0) w1 = Tensor(shape=(3, 2), requires_grad=True, stores_grad=True) w1.gaussian(0.0, 0.1) b1 = Tensor(shape=(1, 2), requires_grad=True, stores_grad=True) b1.set_value(0.0) sgd = optimizer.SGD(0.05) # training process for i in range(1001): x = autograd.matmul(inputs, w0) x = autograd.add_bias(x, b0) x = autograd.relu(x)
y = np.array(y, dtype="int") n = y.shape[0] categorical = np.zeros((n, num_classes)) categorical[np.arange(n), y] = 1 return categorical label = to_categorical(label, 2).astype(np.float32) print("train_data_shape:", data.shape) print("train_label_shape:", label.shape) precision = singa_dtype[args.precision] np_precision = np_dtype[args.precision] dev = device.create_cuda_gpu() inputs = Tensor(data=data, device=dev) target = Tensor(data=label, device=dev) inputs = inputs.as_type(precision) target = target.as_type(tensor.int32) w0_np = np.random.normal(0, 0.1, (2, 3)).astype(np_precision) w0 = Tensor(data=w0_np, device=dev, dtype=precision, requires_grad=True, stores_grad=True) b0 = Tensor(shape=(3, ), device=dev, dtype=precision, requires_grad=True,
num_classes: total number of classes. Return A binary matrix representation of the input. ''' y = np.array(y, dtype='int') n = y.shape[0] categorical = np.zeros((n, num_classes)) categorical[np.arange(n), y] = 1 return categorical label = to_categorical(label, 2).astype(np.float32) print('train_data_shape:', data.shape) print('train_label_shape:', label.shape) inputs = Tensor(data=data) target = Tensor(data=label) linear1 = autograd.Linear(3, 2) linear2 = autograd.Linear(2, 2) linear3 = autograd.Linear(2, 2) sgd = optimizer.SGD(0.00) # training process for i in range(1): x = linear1(inputs) x = autograd.relu(x) x1 = linear2(x) x2 = linear3(x) x3 = autograd.add(x1, x2)
def singa_to_onnx(niter, use_cpu=False): if use_cpu: print("Using CPU") dev = device.get_default_device() else: print("Using GPU") dev = device.create_cuda_gpu() inputs = Tensor( data=data, device=dev, requires_grad=False, stores_grad=False, name="input", ) target = Tensor( data=label, device=dev, requires_grad=False, stores_grad=False, name="target", ) w0 = Tensor(shape=(2, 3), device=dev, requires_grad=True, stores_grad=True) w0.gaussian(0.0, 0.1) b0 = Tensor(shape=(3,), device=dev, requires_grad=True, stores_grad=True) b0.set_value(0.0) w1 = Tensor(shape=(3, 2), device=dev, requires_grad=True, stores_grad=True) w1.gaussian(0.0, 0.1) b1 = Tensor(shape=(2,), device=dev, requires_grad=True, stores_grad=True) b1.set_value(0.0) sgd = opt.SGD(0.1) # training process for i in range(100): x = autograd.matmul(inputs, w0) x = autograd.add_bias(x, b0) x = autograd.relu(x) x = autograd.matmul(x, w1) x = autograd.add_bias(x, b1) loss = autograd.softmax_cross_entropy(x, target) for p, gp in autograd.backward(loss): sgd.update(p, gp) print("training loss = ", tensor.to_numpy(loss)[0]) sonnx.export([inputs], [x], file_path="mlp.onnx")
def set_states(self, states): self.step_counter = Tensor((1, )) self.step_counter.set_value(states['step_counter']) self.lr_value = self.lr(self.step_counter)
class Optimizer(object): """Base optimizer. Args: config (Dict): specify the default values of configurable variables. """ def __init__(self, lr, dtype=tensor.float32): # init lr(could be a constant scalar or a learning rate scheduler) if type(lr) == float or type(lr) == int: self.lr = Constant(lr) elif isinstance(lr, DecayScheduler): self.lr = lr else: raise TypeError("Wrong learning rate type") # init step counter self.dtype = dtype # TODO change type to int32 self.step_counter = Tensor((1, ), dtype=tensor.float32) self.step_counter.set_value(0) self.lr_value = self.lr(self.step_counter) def get_states(self): # skip DecayScheduler as it does not have persistent states return {'step_counter': tensor.to_numpy(self.step_counter)[0]} def set_states(self, states): self.step_counter = Tensor((1, )) self.step_counter.set_value(states['step_counter']) self.lr_value = self.lr(self.step_counter) def __call__(self, loss): self.call(loss) self.step() def call(self, loss): for p, g in autograd.backward(loss): if p.name is None: p.name = id(p) self.apply(p.name, p, g) def step(self): """To increment the step counter and update the lr""" self.step_counter.data += 1 lr_value = self.lr(self.step_counter) self.lr_value.copy_from(lr_value) def apply(self, param_name, param_value, param_grad): """Performs a single optimization step. Args: param_name(String): the name of the param param_value(Tensor): param values to be update in-place grad(Tensor): param gradients; the values may be updated in this function; cannot use it anymore """ raise NotImplementedError @deprecated( reason= "Update is deprecated, use apply() to do update, refer to apply for more details." ) def update(self, param, grad): """Update the param values with given gradients. Args: param(Tensor): param values to be updated in-place grad(Tensor): param gradients; the values may be updated in this function; do not use it anymore """ if param.name is None: param.name = id(param) self.apply(param.name, param, grad) def device_check(self, *inputs): flag = inputs[0].device.graph_enabled() inputs[0].device.EnableGraph(False) x_device = inputs[0].device x_dev_id = x_device.id() for var in inputs: if var.device.id() != x_dev_id: var.to_device(x_device) inputs[0].device.EnableGraph(flag) @deprecated( reason= "backward_and_update is deprecated, use __call__() to do update, refer to __call__ for more details." ) def backward_and_update(self, loss): """Performs backward propagation from the loss and parameter update. From the loss, it performs backward propagation to get the gradients and do the parameter update. Args: loss(Tensor): loss is the objective function of the deep learning model optimization, e.g. for classification problem it can be the output of the softmax_cross_entropy function. """ self.__call__(loss)
def call(self, step: Tensor) -> Tensor: # TODO should be an in-place operator ret = Tensor((1, ), step.device) ret.set_value(self.init_value) return ret