def train(model, samples, label): num_layers = 9 num_samples = samples.shape[-1] fc_shape = [512, num_samples] acts = [None] * num_layers sens = [None] * num_layers weightgrad = [None] * len(model.weights) biasgrad = [None] * len(model.bias) acts[0] = samples acts[1] = ele.relu(model.convs[0].ff(acts[0], model.weights[0], model.bias[0])) acts[2] = model.poolings[0].ff(acts[1]) acts[3] = ele.relu(model.convs[1].ff(acts[2], model.weights[1], model.bias[1])) acts[4] = model.poolings[1].ff(acts[3]) acts[5] = model.weights[2] * acts[4].reshape(fc_shape) + model.bias[2] out = conv.softmax(acts[5], conv.soft_op.instance) sens[5] = out - label sens[4] = (model.weights[2].trans() * sens[5]).reshape(acts[4].shape) sens[3] = ele.relu_back(model.poolings[1].bp(sens[4], acts[4], acts[3]), acts[3]) sens[2] = model.convs[1].bp(sens[3], model.weights[1]) sens[1] = ele.relu_back(model.poolings[0].bp(sens[2], acts[2], acts[1]), acts[1]) weightgrad[2] = sens[5] * acts[4].reshape(fc_shape).trans() biasgrad[2] = sens[5].sum(1) weightgrad[1] = model.convs[1].weight_grad(sens[3], acts[2]) biasgrad[1] = model.convs[1].bias_grad(sens[3]) weightgrad[0] = model.convs[0].weight_grad(sens[1], acts[0]) biasgrad[0] = model.convs[0].bias_grad(sens[1]) return (out, weightgrad, biasgrad)
def bpprop(model, samples, label): num_layers = 6 num_samples = samples.shape[-1] fc_shape = [512, num_samples] acts = [None] * num_layers errs = [None] * num_layers weightgrad = [None] * len(model.weights) biasgrad = [None] * len(model.bias) acts[0] = samples acts[1] = ele.relu(model.convs[0].ff(acts[0], model.weights[0], model.bias[0])) acts[2] = model.poolings[0].ff(acts[1]) acts[3] = ele.relu(model.convs[1].ff(acts[2], model.weights[1], model.bias[1])) acts[4] = model.poolings[1].ff(acts[3]) acts[5] = model.weights[2] * acts[4].reshape(fc_shape) + model.bias[2] out = conv.softmax(acts[5], conv.soft_op.instance) errs[5] = out - label errs[4] = (model.weights[2].trans() * errs[5]).reshape(acts[4].shape) errs[3] = ele.relu_back(model.poolings[1].bp(errs[4], acts[4], acts[3]), acts[3]) errs[2] = model.convs[1].bp(errs[3], acts[2], model.weights[1]) errs[1] = ele.relu_back(model.poolings[0].bp(errs[2], acts[2], acts[1]), acts[1]) weightgrad[2] = errs[5] * acts[4].reshape(fc_shape).trans() biasgrad[2] = errs[5].sum(1) weightgrad[1] = model.convs[1].weight_grad(errs[3], acts[2], model.weights[1]) biasgrad[1] = model.convs[1].bias_grad(errs[3]) weightgrad[0] = model.convs[0].weight_grad(errs[1], acts[0], model.weights[0]) biasgrad[0] = model.convs[0].bias_grad(errs[1]) return (out, weightgrad, biasgrad)
def forward(self, from_btm, to_top, phase): to_top[self.top_names[0]] = co.softmax(from_btm[self.btm_names[0]], co.soft_op.instance) self.ff_y = to_top[self.top_names[0]] #turn label into matrix form nplabel = np.zeros([self.ff_y.shape[1], self.ff_y.shape[0]], dtype=np.float32) self.strlabel = from_btm[self.btm_names[1]] for i in range(len(self.strlabel)): nplabel[i, self.strlabel[i]] = 1 self.y = owl.from_numpy(nplabel)
def run(self): (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size) np.set_printoptions(linewidth=200) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = map(lambda npdata: owl.from_numpy(npdata), test_data) count = 1 owl.set_device(self.gpu) for epoch in range(self.num_epochs): print '---Start epoch #%d' % epoch # train for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] a1 = owl.from_numpy(mb_samples) target = owl.from_numpy(mb_labels) # ff a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 # softmax & error out = co.softmax(a3) s3 = out - target # bp s2 = self.w2.trans() * s3 s2 = ele.relu_back(s2, a2) # grad gw1 = s2 * a1.trans() / num_samples gb1 = s2.sum(1) / num_samples gw2 = s3 * a2.trans() / num_samples gb2 = s3.sum(1) / num_samples # update self.w1 -= self.eps_w * gw1 self.w2 -= self.eps_w * gw2 self.b1 -= self.eps_b * gb1 self.b2 -= self.eps_b * gb2 if (count % 40 == 0): correct = out.argmax(0) - target.argmax(0) val = correct.to_numpy() print 'Training error:', float( np.count_nonzero(val)) / num_samples count = count + 1 # test a1 = test_samples a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 correct = a3.argmax(0) - test_labels.argmax(0) val = correct.to_numpy() #print val print 'Testing error:', float( np.count_nonzero(val)) / num_test_samples print '---Finish epoch #%d' % epoch
def test(self): base = np.asarray([40.0,20.0,30.0,10.0]) max = np.max(base) base = np.reshape(base, [1,1,1,4]) owlarray = owl.from_numpy(base) expected = np.exp(base - max) expected = expected / np.sum(expected) test = conv.softmax(owlarray) #print 'Expected\n',expected #print "Actual\n",test.to_numpy() self.assertTrue(np.allclose(expected, test.to_numpy()))
def run(self): (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size) np.set_printoptions(linewidth=200) num_test_samples = test_data[0].shape[0] (test_samples, test_labels) = map(lambda npdata : owl.from_numpy(npdata), test_data) count = 1 owl.set_device(self.gpu) for epoch in range(self.num_epochs): print '---Start epoch #%d' % epoch # train for (mb_samples, mb_labels) in train_data: num_samples = mb_samples.shape[0] a1 = owl.from_numpy(mb_samples) target = owl.from_numpy(mb_labels) # ff a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 # softmax & error out = co.softmax(a3) s3 = out - target # bp s2 = self.w2.trans() * s3 s2 = ele.relu_back(s2, a2) # grad gw1 = s2 * a1.trans() / num_samples gb1 = s2.sum(1) / num_samples gw2 = s3 * a2.trans() / num_samples gb2 = s3.sum(1) / num_samples # update self.w1 -= self.eps_w * gw1 self.w2 -= self.eps_w * gw2 self.b1 -= self.eps_b * gb1 self.b2 -= self.eps_b * gb2 if (count % 40 == 0): correct = out.max_index(0) - target.max_index(0) val = correct.to_numpy() print 'Training error:', float(np.count_nonzero(val)) / num_samples count = count + 1 # test a1 = test_samples a2 = ele.relu(self.w1 * a1 + self.b1) a3 = self.w2 * a2 + self.b2 correct = a3.max_index(0) - test_labels.max_index(0) val = correct.to_numpy() #print val print 'Testing error:', float(np.count_nonzero(val)) / num_test_samples print '---Finish epoch #%d' % epoch
def bpprop(model, samples, label): num_layers = model.layers num_samples = samples.shape[-1] fc_shape = [model.convolution_output_size, num_samples] acts = [None] * num_layers errs = [None] * num_layers weightgrad = [None] * len(model.weights) biasgrad = [None] * len(model.bias) acts[0] = samples acts[1] = ele.relu(model.convs[0].ff(acts[0], model.weights[0], model.bias[0])) acts[2] = model.poolings[0].ff(acts[1]) acts[3] = ele.relu(model.convs[1].ff(acts[2], model.weights[1], model.bias[1])) acts[4] = model.poolings[1].ff(acts[3]) acts[5] = model.weights[2] * acts[4].reshape(fc_shape) + model.bias[2] acts[6] = model.weights[3] * acts[5] + model.bias[3] out = conv.softmax(acts[6], conv.soft_op.instance) errs[6] = out - label errs[5] = (model.weights[3].trans() * errs[6]).reshape(acts[5].shape) errs[4] = (model.weights[2].trans() * errs[5]).reshape(acts[4].shape) errs[3] = ele.relu_back(model.poolings[1].bp(errs[4], acts[4], acts[3]), acts[3]) errs[2] = model.convs[1].bp(errs[3], acts[2], model.weights[1]) errs[1] = ele.relu_back(model.poolings[0].bp(errs[2], acts[2], acts[1]), acts[1]) weightgrad[3] = errs[6] * acts[5].trans() biasgrad[3] = errs[6].sum(1) weightgrad[2] = errs[5] * acts[4].reshape(fc_shape).trans() biasgrad[2] = errs[5].sum(1) weightgrad[1] = model.convs[1].weight_grad(errs[3], acts[2], model.weights[1]) biasgrad[1] = model.convs[1].bias_grad(errs[3]) weightgrad[0] = model.convs[0].weight_grad(errs[1], acts[0], model.weights[0]) biasgrad[0] = model.convs[0].bias_grad(errs[1]) return (out, weightgrad, biasgrad)
def ff(self, x): self.ff_y = co.softmax(x, co.soft_op.instance) return self.ff_y
def train_one_mb(self, data, label, dropout_rate): num_samples = data.shape[-1] num_layers = 12 acts = [None] * num_layers sens = [None] * num_layers weightsgrad = [None] * self.num_weights biasgrad = [None] * self.num_weights # FF acts[0] = data acts[1] = ele.relu(self.convs[0].ff(acts[0], self.weights[0], self.bias[0])) # conv1 acts[2] = self.poolings[0].ff(acts[1]) # pool1 acts[3] = ele.relu(self.convs[1].ff(acts[2], self.weights[1], self.bias[1])) # conv2 acts[4] = self.poolings[1].ff(acts[3]) # pool2 acts[5] = ele.relu(self.convs[2].ff(acts[4], self.weights[2], self.bias[2])) # conv3 acts[6] = ele.relu(self.convs[3].ff(acts[5], self.weights[3], self.bias[3])) # conv4 acts[7] = ele.relu(self.convs[4].ff(acts[6], self.weights[4], self.bias[4])) # conv5 acts[8] = self.poolings[2].ff(acts[7]) # pool5 re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples]) acts[9] = ele.relu(self.weights[5] * re_acts8 + self.bias[5]) # fc6 mask6 = owl.randb(acts[9].shape, dropout_rate) acts[9] = ele.mult(acts[9], mask6) # drop6 acts[10] = ele.relu(self.weights[6] * acts[9] + self.bias[6]) # fc7 mask7 = owl.randb(acts[10].shape, dropout_rate) acts[10] = ele.mult(acts[10], mask7) # drop7 acts[11] = self.weights[7] * acts[10] + self.bias[7] # fc8 out = co.softmax(acts[11], co.soft_op.instance) # prob sens[11] = out - label sens[10] = self.weights[7].trans() * sens[11] # fc8 sens[10] = ele.mult(sens[10], mask7) # drop7 sens[10] = ele.relu_back(sens[10], acts[10]) # relu7 sens[9] = self.weights[6].trans() * sens[10] sens[9] = ele.mult(sens[9], mask6) # drop6 sens[9] = ele.relu_back(sens[9], acts[9]) # relu6 sens[8] = (self.weights[5].trans() * sens[9]).reshape( acts[8].shape) # fc6 sens[7] = ele.relu_back(self.poolings[2].bp(sens[8], acts[8], acts[7]), acts[7]) # pool5, relu5 sens[6] = ele.relu_back(self.convs[4].bp(sens[7], self.weights[4]), acts[6]) # conv5, relu4 sens[5] = ele.relu_back(self.convs[3].bp(sens[6], self.weights[3]), acts[5]) # conv4, relu3 sens[4] = self.convs[2].bp(sens[5], self.weights[2]) # conv3 sens[3] = ele.relu_back(self.poolings[1].bp(sens[4], acts[4], acts[3]), acts[3]) # pool2, relu2 sens[2] = self.convs[1].bp(sens[3], self.weights[1]) # conv2 sens[1] = self.poolings[0].bp(sens[2], acts[2], acts[1]) # pool1 sens[1] = ele.relu_back(sens[1], acts[1]) # relu1 weightsgrad[7] = sens[11] * acts[10].trans() weightsgrad[6] = sens[10] * acts[9].trans() weightsgrad[5] = sens[9] * re_acts8.trans() weightsgrad[4] = self.convs[4].weight_grad(sens[7], acts[6]) weightsgrad[3] = self.convs[3].weight_grad(sens[6], acts[5]) weightsgrad[2] = self.convs[2].weight_grad(sens[5], acts[4]) weightsgrad[1] = self.convs[1].weight_grad(sens[3], acts[2]) weightsgrad[0] = self.convs[0].weight_grad(sens[1], acts[0]) biasgrad[7] = sens[11].sum(1) biasgrad[6] = sens[10].sum(1) biasgrad[5] = sens[9].sum(1) biasgrad[4] = self.convs[4].bias_grad(sens[7]) biasgrad[3] = self.convs[3].bias_grad(sens[6]) biasgrad[2] = self.convs[2].bias_grad(sens[5]) biasgrad[1] = self.convs[1].bias_grad(sens[3]) biasgrad[0] = self.convs[0].bias_grad(sens[1]) return (out, weightsgrad, biasgrad)
def train_one_mb(self, data, label, dropout_rate): num_samples = data.shape[-1] num_layers = 12 acts = [None] * num_layers sens = [None] * num_layers weightsgrad = [None] * self.num_weights biasgrad = [None] * self.num_weights # FF acts[0] = data acts[1] = ele.relu(self.convs[0].ff(acts[0], self.weights[0], self.bias[0])) # conv1 acts[2] = self.poolings[0].ff(acts[1]) # pool1 acts[3] = ele.relu(self.convs[1].ff(acts[2], self.weights[1], self.bias[1])) # conv2 acts[4] = self.poolings[1].ff(acts[3]) # pool2 acts[5] = ele.relu(self.convs[2].ff(acts[4], self.weights[2], self.bias[2])) # conv3 acts[6] = ele.relu(self.convs[3].ff(acts[5], self.weights[3], self.bias[3])) # conv4 acts[7] = ele.relu(self.convs[4].ff(acts[6], self.weights[4], self.bias[4])) # conv5 acts[8] = self.poolings[2].ff(acts[7]) # pool5 re_acts8 = acts[8].reshape([np.prod(acts[8].shape[0:3]), num_samples]) acts[9] = ele.relu(self.weights[5] * re_acts8 + self.bias[5]) # fc6 mask6 = owl.randb(acts[9].shape, dropout_rate) acts[9] = ele.mult(acts[9], mask6) # drop6 acts[10] = ele.relu(self.weights[6] * acts[9] + self.bias[6]) # fc7 mask7 = owl.randb(acts[10].shape, dropout_rate) acts[10] = ele.mult(acts[10], mask7) # drop7 acts[11] = self.weights[7] * acts[10] + self.bias[7] # fc8 out = co.softmax(acts[11], co.soft_op.instance) # prob sens[11] = out - label sens[10] = self.weights[7].trans() * sens[11] # fc8 sens[10] = ele.mult(sens[10], mask7) # drop7 sens[10] = ele.relu_back(sens[10], acts[10]) # relu7 sens[9] = self.weights[6].trans() * sens[10] sens[9] = ele.mult(sens[9], mask6) # drop6 sens[9] = ele.relu_back(sens[9], acts[9]) # relu6 sens[8] = (self.weights[5].trans() * sens[9]).reshape(acts[8].shape) # fc6 sens[7] = ele.relu_back(self.poolings[2].bp(sens[8], acts[8], acts[7]), acts[7]) # pool5, relu5 sens[6] = ele.relu_back(self.convs[4].bp(sens[7], acts[6], self.weights[4]), acts[6]) # conv5, relu4 sens[5] = ele.relu_back(self.convs[3].bp(sens[6], acts[5], self.weights[3]), acts[5]) # conv4, relu3 sens[4] = self.convs[2].bp(sens[5], acts[4], self.weights[2]) # conv3 sens[3] = ele.relu_back(self.poolings[1].bp(sens[4], acts[4], acts[3]), acts[3]) # pool2, relu2 sens[2] = self.convs[1].bp(sens[3], acts[2], self.weights[1]) # conv2 sens[1] = self.poolings[0].bp(sens[2], acts[2], acts[1]) # pool1 sens[1] = ele.relu_back(sens[1], acts[1]) # relu1 weightsgrad[7] = sens[11] * acts[10].trans() weightsgrad[6] = sens[10] * acts[9].trans() weightsgrad[5] = sens[9] * re_acts8.trans() weightsgrad[4] = self.convs[4].weight_grad(sens[7], acts[6], self.weights[4]) weightsgrad[3] = self.convs[3].weight_grad(sens[6], acts[5], self.weights[3]) weightsgrad[2] = self.convs[2].weight_grad(sens[5], acts[4], self.weights[2]) weightsgrad[1] = self.convs[1].weight_grad(sens[3], acts[2], self.weights[1]) weightsgrad[0] = self.convs[0].weight_grad(sens[1], acts[0], self.weights[0]) biasgrad[7] = sens[11].sum(1) biasgrad[6] = sens[10].sum(1) biasgrad[5] = sens[9].sum(1) biasgrad[4] = self.convs[4].bias_grad(sens[7]) biasgrad[3] = self.convs[3].bias_grad(sens[6]) biasgrad[2] = self.convs[2].bias_grad(sens[5]) biasgrad[1] = self.convs[1].bias_grad(sens[3]) biasgrad[0] = self.convs[0].bias_grad(sens[1]) return (out, weightsgrad, biasgrad)
def forward(self, from_btm, to_top, phase): to_top[self.top_names[0]] = co.softmax(from_btm[self.btm_names[0]], co.soft_op.instance) self.ff_y = to_top[self.top_names[0]] self.y = from_btm[self.btm_names[1]]