def test_Adam_const_lr(self, dev=cpu_dev): cpu_dev.EnableGraph(False) opt1 = opt.Adam(lr=0.1) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(1.0) g = tensor.Tensor(w_shape, device=dev).set_value(0.1) # m := beta_1 * m + (1 - beta_1) * grad # v := beta_2 * v + (1 - beta_2) * grad * grad # m_norm = m / (1 - beta_1 ^ step) # v_norm = v / (1 - beta_2 ^ step) # param := param - (lr * m_norm) / ( sqrt(v_norm) + epsilon) ) m = 0.1 * g tmp = tensor.square(g) v = 0.001 * tmp m_norm = m / 0.1 v_norm = v / 0.001 tmp = tensor.sqrt(v_norm) + 1e-8 tmp = m_norm / tmp w_step1 = w - 0.1 * tmp opt1.apply(w.name, w, g) assertTensorEqual(w, w_step1, decimal=5)
def test_AdaGrad_const_lr(self, dev=cpu_dev): cpu_dev.EnableGraph(False) opt1 = opt.AdaGrad(lr=0.1) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(0.1) g = tensor.Tensor(w_shape, device=dev).set_value(0.1) # history = history + param_grad * param_grad # param_value = param_value - lr * param_grad / sqrt(history + epsilon) history = tensor.square(g) tmp = history + 1e-8 tmp = tensor.sqrt(tmp) tmp = g / tmp w_step1 = w - 0.1 * tmp opt1.apply(w.name, w, g) assertTensorEqual(w, w_step1)
def test_RMSProp_const_lr(self, dev=cpu_dev): cpu_dev.EnableGraph(False) opt1 = opt.RMSProp(lr=0.1) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(0.1) g = tensor.Tensor(w_shape, device=dev).set_value(0.1) # running_average = running_average * rho + param_grad * param_grad * (1 - rho) # param_value = param_value - lr * param_grad / sqrt(running_average + epsilon) running_average = 0.1 * tensor.square(g) tmp = running_average + 1e-8 tmp = tensor.sqrt(tmp) tmp = g / tmp w_step1 = w - 0.1 * tmp opt1.apply(w.name, w, g) assertTensorEqual(w, w_step1)
def train(data_file, use_gpu, num_epoch=10, batch_size=100): print 'Start intialization............' lr = 0.1 # Learning rate weight_decay = 0.0002 hdim = 1000 vdim = 784 opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay) tweight = tensor.Tensor((vdim, hdim)) tweight.gaussian(0.0, 0.1) tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32)) thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32)) opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay) print 'Loading data ..................' train_x, valid_x = load_train_data(data_file) if use_gpu: dev = device.create_cuda_gpu() else: dev = device.get_default_device() for t in [tweight, tvbias, thbias]: t.to_device(dev) num_train_batch = train_x.shape[0] / batch_size print "num_train_batch = %d " % (num_train_batch) for epoch in range(num_epoch): trainerrorsum = 0.0 print 'Epoch %d' % epoch for b in range(num_train_batch): # positive phase tdata = tensor.from_numpy( train_x[(b * batch_size):((b + 1) * batch_size), : ]) tdata.to_device(dev) tposhidprob = tensor.mult(tdata, tweight) tposhidprob.add_row(thbias) tposhidprob = tensor.sigmoid(tposhidprob) tposhidrandom = tensor.Tensor(tposhidprob.shape, dev) tposhidrandom.uniform(0.0, 1.0) tposhidsample = tensor.gt(tposhidprob, tposhidrandom) # negative phase tnegdata = tensor.mult(tposhidsample, tweight.T()) tnegdata.add_row(tvbias) tnegdata = tensor.sigmoid(tnegdata) tneghidprob = tensor.mult(tnegdata, tweight) tneghidprob.add_row(thbias) tneghidprob = tensor.sigmoid(tneghidprob) error = tensor.sum(tensor.square((tdata - tnegdata))) trainerrorsum = error + trainerrorsum tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\ tensor.mult(tdata.T(), tposhidprob) tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0) tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0) opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w') opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb') opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb') print 'training errorsum = %f' % (trainerrorsum) tvaliddata = tensor.from_numpy(valid_x) tvaliddata.to_device(dev) tvalidposhidprob = tensor.mult(tvaliddata, tweight) tvalidposhidprob.add_row(thbias) tvalidposhidprob = tensor.sigmoid(tvalidposhidprob) tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev) initializer.uniform(tvalidposhidrandom, 0.0, 1.0) tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom) tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T()) tvalidnegdata.add_row(tvbias) tvalidnegdata = tensor.sigmoid(tvalidnegdata) validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata))) print 'valid errorsum = %f' % (validerrorsum)
def train(data_file, use_gpu, num_epoch=10, batch_size=100): print('Start intialization............') lr = 0.1 # Learning rate weight_decay = 0.0002 hdim = 1000 vdim = 784 tweight = tensor.Tensor((vdim, hdim)) tweight.gaussian(0.0, 0.1) tvbias = tensor.from_numpy(np.zeros(vdim, dtype=np.float32)) thbias = tensor.from_numpy(np.zeros(hdim, dtype=np.float32)) opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay) print('Loading data ..................') train_x, valid_x = load_train_data(data_file) if use_gpu: dev = device.create_cuda_gpu() else: dev = device.get_default_device() for t in [tweight, tvbias, thbias]: t.to_device(dev) num_train_batch = train_x.shape[0] // batch_size print("num_train_batch = %d " % (num_train_batch)) for epoch in range(num_epoch): trainerrorsum = 0.0 print('Epoch %d' % epoch) for b in range(num_train_batch): # positive phase tdata = tensor.from_numpy( train_x[(b * batch_size):((b + 1) * batch_size), :]) tdata.to_device(dev) tposhidprob = tensor.mult(tdata, tweight) tposhidprob = tposhidprob + thbias tposhidprob = tensor.sigmoid(tposhidprob) tposhidrandom = tensor.Tensor(tposhidprob.shape, dev) tposhidrandom.uniform(0.0, 1.0) tposhidsample = tensor.gt(tposhidprob, tposhidrandom) # negative phase tnegdata = tensor.mult(tposhidsample, tweight.T()) tnegdata = tnegdata + tvbias tnegdata = tensor.sigmoid(tnegdata) tneghidprob = tensor.mult(tnegdata, tweight) tneghidprob = tneghidprob + thbias tneghidprob = tensor.sigmoid(tneghidprob) error = tensor.sum(tensor.square((tdata - tnegdata))) trainerrorsum = error + trainerrorsum tgweight = tensor.mult(tnegdata.T(), tneghidprob) \ - tensor.mult(tdata.T(), tposhidprob) tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0) tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0) opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w') opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb') opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb') print('training erroraverage = %f' % (tensor.to_numpy(trainerrorsum) / train_x.shape[0])) tvaliddata = tensor.from_numpy(valid_x) tvaliddata.to_device(dev) tvalidposhidprob = tensor.mult(tvaliddata, tweight) tvalidposhidprob = tvalidposhidprob + thbias tvalidposhidprob = tensor.sigmoid(tvalidposhidprob) tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev) initializer.uniform(tvalidposhidrandom, 0.0, 1.0) tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom) tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T()) tvalidnegdata = tvalidnegdata + tvbias tvalidnegdata = tensor.sigmoid(tvalidnegdata) validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata))) print('valid erroraverage = %f' % (tensor.to_numpy(validerrorsum) / valid_x.shape[0]))