def test(self):
     # Expected
     in_channels = 3
     in_dim = 11
     out_channels = 5
     out_dim = (in_dim/2 + 1)
     img = np.arange(0,in_dim*in_dim*in_channels*1, dtype=np.float32)
     img = np.reshape(img,[in_dim,in_dim,in_channels,1])
     filter = np.arange(0,3*3*in_channels*out_channels, dtype=np.float32)
     filter = np.reshape(filter,[3,3,in_channels,out_channels])
     bias = np.zeros([5])
     expected = np.zeros([out_dim,out_dim,out_channels])
     for och in range(out_channels):
         tmp = np.zeros([out_dim,out_dim,1])
         for ich in range(in_channels):
             imgslice = np.reshape(img[:,:,ich,0],[in_dim,in_dim])
             filterslice = np.reshape(filter[:,:,ich,och],[3,3])
             tmp += np.reshape(convolve(imgslice,filterslice,mode='constant',cval = 0.0)[::2,::2] , [out_dim, out_dim, 1])
         expected[:,:,och] = np.squeeze(tmp) + bias[och]
         
     # test
     owlimg = owl.from_numpy(np.transpose(img))
     owlfilter = owl.from_numpy(np.transpose(filter))
     owlbias = owl.from_numpy(bias)
     convolver = owl.conv.Convolver(1,1,2,2)   
     test = convolver.ff(owlimg, owlfilter, owlbias)
     
     print 'Expected\n',expected
     print "Actual\n",test.to_numpy()
     self.assertTrue(np.allclose(expected, test))
Exemple #2
0
    def init_weights_with_filler(self):
        ''' Init weights & bias. The function will be called during weight initialization.

        Currently, four types of initializers are supported: ``"constant", "gaussian", "uniform", "xavier"``.
        '''
        #init weight
        npweights = None
        if self.weight_filler.type == "constant":
            npweights = np.ones(self.wshape, dtype = np.float32) * self.weight_filler.value
        elif self.weight_filler.type == "gaussian":
            npweights = np.random.normal(self.weight_filler.mean, self.weight_filler.std, self.wshape)
        elif self.weight_filler.type == "uniform":
            npweights = np.random.uniform(self.weight_filler.min, self.weight_filler.max, self.wshape)
        elif self.weight_filler.type == "xavier":
            fan_in = np.prod(self.in_shape[:])
            scale = np.sqrt(float(3)/fan_in)
            npweights = np.random.uniform(-scale, scale, self.wshape)
        self.weight = owl.from_numpy(npweights.astype(np.float32)).reshape(self.wshape)
      
        #init bias
        npwbias = None
        if self.bias_filler.type == "constant":
            npbias = np.ones(self.bshape, dtype = np.float32) * self.bias_filler.value
        elif self.bias_filler.type == "gaussian":
            npbias = np.random.normal(self.bias_filler.mean, self.bias_filler.std, self.bshape)
        elif self.bias_filler.type == "uniform":
            npbias = np.random.uniform(self.bias_filler.min, self.bias_filler.max, self.bshape)
        elif self.bias_filler.type == "xavier":
            fan_in = np.prod(self.in_shape[:])
            scale = np.sqrt(float(3)/fan_in)
            npbias = np.random.uniform(-scale, scale, self.bshape)
        self.bias = owl.from_numpy(npbias.astype(np.float32)).reshape(self.bshape)
Exemple #3
0
def train_network(model, num_epochs=100, minibatch_size=256, lr=0.01, mom=0.75, wd=5e-4):
    # load data
    (train_data, test_data) = mnist_io.load_mb_from_mat('mnist_all.mat', minibatch_size / len(gpu))
    num_test_samples = test_data[0].shape[0]
    test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples])
    test_labels = owl.from_numpy(test_data[1])
    for i in xrange(num_epochs):
        print "---Epoch #", i
        last = time.time()
        count = 0
        weightgrads = [None] * len(gpu)
        biasgrads = [None] * len(gpu)
        for (mb_samples, mb_labels) in train_data:
            count += 1
            current_gpu = count % len(gpu)
            owl.set_device(gpu[current_gpu])
            num_samples = mb_samples.shape[0]
            data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_numpy(mb_labels)
            out, weightgrads[current_gpu], biasgrads[current_gpu] = bpprop(model, data, label)
            if current_gpu == 0:
                for k in range(len(model.weights)):
                    model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(weightgrads, 0, k) - lr * wd * model.weights[k]
                    model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(gpu) * multi_gpu_merge(biasgrads, 0, k)
                    model.weights[k] += model.weightdelta[k]
                    model.bias[k] += model.biasdelta[k]
                if count % (len(gpu) * lazy_cycle) == 0:
                    print_training_accuracy(out, label, num_samples, 'Training')
        print '---End of Epoch #', i, 'time:', time.time() - last
        # do test
        out, _, _  = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
Exemple #4
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
            train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape([227, 227, 3, num_samples])
            target = owl.from_numpy(labels)

            out, weightsgrad, biasgrad = model.train_one_mb(data, target, dropout_rate)
            model.update(weightsgrad, biasgrad, num_samples, mom, eps_w, wd)

            if count % 4 == 0:
                print_training_accuracy(out, target, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
Exemple #5
0
 def init_weights_with_filler(self):
     #init weight
     npweights = None
     if self.weight_filler.type == "constant":
         npweights = np.ones(self.wshape, dtype = np.float32) * self.weight_filler.value
     elif self.weight_filler.type == "gaussian":
         npweights = np.random.normal(self.weight_filler.mean, self.weight_filler.std, self.wshape)
     elif self.weight_filler.type == "uniform":
         npweights = np.random.uniform(self.weight_filler.min, self.weight_filler.max, self.wshape)
     elif self.weight_filler.type == "xavier":
         fan_in = np.prod(self.in_shape[:])
         scale = np.sqrt(float(3)/fan_in)
         npweights = np.random.uniform(-scale, scale, self.wshape)
     self.weight = owl.from_numpy(npweights.astype(np.float32)).reshape(self.wshape)
   
     #init bias
     npwbias = None
     if self.bias_filler.type == "constant":
         npbias = np.ones(self.bshape, dtype = np.float32) * self.bias_filler.value
     elif self.bias_filler.type == "gaussian":
         npbias = np.random.normal(self.bias_filler.mean, self.bias_filler.std, self.bshape)
     elif self.bias_filler.type == "uniform":
         npbias = np.random.uniform(self.bias_filler.min, self.bias_filler.max, self.bshape)
     elif self.bias_filler.type == "xavier":
         fan_in = np.prod(self.in_shape[:])
         scale = np.sqrt(float(3)/fan_in)
         npbias = np.random.uniform(-scale, scale, self.bshape)
     self.bias = owl.from_numpy(npbias.astype(np.float32)).reshape(self.bshape)
def train_network(model, num_epochs = 100, minibatch_size=10,
        dropout_rate = 0.5, eps_w = 0.01, mom = 0.9, wd = 0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()
    cropped_size = 224

    dp = ImageNetDataProvider(mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
            train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    #mark the output layer
    output_layer = 'prob'

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size, cropped_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape([cropped_size, cropped_size, 3, num_samples])
            target = owl.from_numpy(labels)
            model.ff(data, target)
            print_training_accuracy(model.layers[output_layer].get_act(), target, minibatch_size)
            model.bp(data, target)
            exit(0)
 def test(self):
     bottom = np.asarray([2,-1,0,1,2,3], np.float32)
     top = np.asarray([0,0,0,1,2,3], np.float32)
     top_diff = np.asarray([0.1,0.1,0.1,0.1,0.1,0.1], np.float32)
     print top_diff.shape
     expected = np.asarray([0,0,0,0.1,0.1,0.1], np.float32)
     owldiff = owl.from_numpy(top_diff)
     owltop = owl.from_numpy(top)
     test = elewise.relu_back(owldiff,owltop)
     #print 'Expected\n',expected
     #print "Actual\n",test.to_numpy()
     self.assertTrue(np.allclose(expected, test.to_numpy()))
Exemple #8
0
def train_network(model, num_epochs = 100, minibatch_size=256,
        dropout_rate = 0.5, eps_w = 0.01, eps_b = 0.01, mom = 0.9, wd = 0.0005):
    gpu = [None] * 2
    gpu[0] = owl.create_gpu_device(0)
    gpu[1] = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(mean_file='/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
            train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
            val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
            test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / 2

    wgrad = [None] * 2
    bgrad = [None] * 2
    num_samples = 0

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
        #for j in range(300):
            count = count + 1
            gpuid = count % 2
            owl.set_device(gpu[gpuid])

            data = owl.from_numpy(samples).reshape([227, 227, 3, samples.shape[0]])
            label = owl.from_numpy(labels)
            #data = owl.randn([227, 227, 3, 128], 0.0, 0.01)
            #label = owl.randn([1000, 128], 0.0, 0.01)
            num_samples += data.shape[-1]
            (out, wgrad[gpuid], bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate)

            if count % 2 != 0:
                continue

            for k in range(num_weights):
                wgrad[0][k] += wgrad[1][k]
                bgrad[0][k] += bgrad[1][k]

            model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd)

            if count % 8 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()

            num_samples = 0
            wgrad = [None] * 2
            bgrad = [None] * 2
Exemple #9
0
    def run(self):
        (train_data, test_data) = mnist_io.load_mb_from_mat(self.data_file, self.mb_size)
        np.set_printoptions(linewidth=200)
        num_test_samples = test_data[0].shape[0]
        (test_samples, test_labels) = map(lambda npdata : owl.from_numpy(npdata), test_data)
        count = 1
        owl.set_device(self.gpu)
        for epoch in range(self.num_epochs):
            print '---Start epoch #%d' % epoch
            # train
            for (mb_samples, mb_labels) in train_data:
                num_samples = mb_samples.shape[0]

                a1 = owl.from_numpy(mb_samples)
                target = owl.from_numpy(mb_labels)

                # ff
                a2 = ele.relu(self.w1 * a1 + self.b1)
                a3 = self.w2 * a2 + self.b2
                # softmax & error
                out = co.softmax(a3)
                s3 = out - target
                # bp
                s2 = self.w2.trans() * s3
                s2 = ele.relu_back(s2, a2)
                # grad
                gw1 = s2 * a1.trans() / num_samples
                gb1 = s2.sum(1) / num_samples
                gw2 = s3 * a2.trans() / num_samples
                gb2 = s3.sum(1) / num_samples
                # update
                self.w1 -= self.eps_w * gw1
                self.w2 -= self.eps_w * gw2
                self.b1 -= self.eps_b * gb1
                self.b2 -= self.eps_b * gb2

                if (count % 40 == 0):
                    correct = out.max_index(0) - target.max_index(0)
                    val = correct.to_numpy()
                    print 'Training error:', float(np.count_nonzero(val)) / num_samples
                count = count + 1

            # test
            a1 = test_samples
            a2 = ele.relu(self.w1 * a1 + self.b1)
            a3 = self.w2 * a2 + self.b2
            correct = a3.max_index(0) - test_labels.max_index(0)
            val = correct.to_numpy()
            #print val
            print 'Testing error:', float(np.count_nonzero(val)) / num_test_samples
            print '---Finish epoch #%d' % epoch
Exemple #10
0
    def forward(self, from_btm, to_top, phase):
        ''' Feed-forward of data unit will get a batch of a fixed batch_size from data provider. 

        .. note::
            
            Phase indicates whether it's training or testing. Usualy, the data augmentation operation for training involves some randomness, while testing doesn't
        
        '''
        
        if self.generator == None:
            self.generator = self.dp.get_mb(phase)

        while True:
            try:
                (samples, labels) = next(self.generator)
                if len(labels) == 0:
                    (samples, labels) = next(self.generator)
            except StopIteration:
                print 'Have scanned the whole dataset; start from the begginning agin'
                self.generator = self.dp.get_mb(phase)
                continue
            break

        to_top[self.top_names[0]] = owl.from_numpy(samples).reshape(
                [self.crop_size, self.crop_size, 3, samples.shape[0]])
        #may have multiplier labels
        for i in range (1, len(self.top_names)):
            to_top[self.top_names[i]] = labels[:,i - 1]
Exemple #11
0
    def forward(self, from_btm, to_top, phase):
        ''' Feed-forward operation may vary according to phase. 

        .. note::

            LMDB data provider now support multi-view testing, if phase is "MULTI_VIEW", it will produce concequtive 10 batches of different views of the same original image     
        '''
        if self.generator == None:
            if phase == 'TRAIN' or phase == 'TEST':
                self.generator = self.dp.get_mb(phase)
            #multiview test
            else:
                self.generator = self.dp.get_multiview_mb()
        while True:
            try:
                (samples, labels) = next(self.generator)
                if len(labels) == 0:
                    (samples, labels) = next(self.generator)
            except StopIteration:
                print 'Have scanned the whole dataset; start from the begginning agin'
                self.generator = self.dp.get_mb(phase)
                continue
            break
        to_top[self.top_names[0]] = owl.from_numpy(samples).reshape(
                [self.crop_size, self.crop_size, 3, samples.shape[0]])
        for i in range (1, len(self.top_names)):
            to_top[self.top_names[i]] = labels[:,i - 1]
Exemple #12
0
    def forward(self, from_btm, to_top, phase):
        ''' Feed-forward operation may vary according to phase. 

        .. note::

        '''
        if self.generator == None:
            self.generator = self.dp.get_mb(phase)
        while True:
            try:
                (samples, labels) = next(self.generator)
                if len(labels) == 0:
                    (samples, labels) = next(self.generator)
            except StopIteration:
                print 'Have scanned the whole dataset; start from the begginning agin'
                if self.multiview == False:
                    self.generator = self.dp.get_mb(phase)
                #multiview test
                else:
                    self.generator = self.dp.get_multiview_mb()
                continue
            break
        #TODO(Jesse Lovitt): Change this 256 to a division by 256/max-fixed-point-value
        to_top[self.top_names[0]] = owl.from_numpy(samples).reshape(
                [self.crop_size, self.crop_size, 3, samples.shape[0]])
        for i in range (1, len(self.top_names)):
            to_top[self.top_names[i]] = labels[:,i - 1]
        #to_top[self.top_names[0]] = owl.zeros([self.crop_size, self.crop_size, 3, 256])
        #for i in range (1, len(self.top_names)):
            #to_top[self.top_names[i]] = np.ones(256)
        self.out = to_top[self.top_names[0]]
Exemple #13
0
def train_network(model,
                  num_epochs=100,
                  minibatch_size=256,
                  lr=0.01,
                  mom=0.75,
                  wd=5e-4):
    # load data
    (train_data,
     test_data) = mnist_io.load_mb_from_mat('mnist_all.mat',
                                            minibatch_size / len(gpu))
    num_test_samples = test_data[0].shape[0]
    test_samples = owl.from_numpy(test_data[0]).reshape(
        [28, 28, 1, num_test_samples])
    test_labels = owl.from_numpy(test_data[1])
    for i in xrange(num_epochs):
        print "---Epoch #", i
        last = time.time()
        count = 0
        weightgrads = [None] * len(gpu)
        biasgrads = [None] * len(gpu)
        for (mb_samples, mb_labels) in train_data:
            count += 1
            current_gpu = count % len(gpu)
            owl.set_device(gpu[current_gpu])
            num_samples = mb_samples.shape[0]
            data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_numpy(mb_labels)
            out, weightgrads[current_gpu], biasgrads[current_gpu] = bpprop(
                model, data, label)
            if current_gpu == 0:
                for k in range(len(model.weights)):
                    model.weightdelta[k] = mom * model.weightdelta[
                        k] - lr / num_samples / len(gpu) * multi_gpu_merge(
                            weightgrads, 0, k) - lr * wd * model.weights[k]
                    model.biasdelta[
                        k] = mom * model.biasdelta[k] - lr / num_samples / len(
                            gpu) * multi_gpu_merge(biasgrads, 0, k)
                    model.weights[k] += model.weightdelta[k]
                    model.bias[k] += model.biasdelta[k]
                if count % (len(gpu) * lazy_cycle) == 0:
                    print_training_accuracy(out, label, num_samples,
                                            'Training')
        print '---End of Epoch #', i, 'time:', time.time() - last
        # do test
        out, _, _ = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
Exemple #14
0
    def test(self):
	base = np.arange(-10,10,dtype=np.float32)/2
        a = owl.from_numpy(base)
        test = owl.NArray.relu(a)
        expected = base.clip(0,20)
        print 'Expected\n',expected
        print "Actual\n",test.to_numpy()
        self.assertTrue(np.array_equal(expected,test.to_numpy()))
Exemple #15
0
  def init_net_from_file(self, owl_net, weightpath, epochidx):
      weightpath = "%ssnapshot%d/" % (weightpath, epochidx)
      for i in range(len(owl_net.units)):
          if isinstance(owl_net.units[i], net.FullyConnection):
              #print owl_net.units[i].name
              layername = owl_net.units[i].name
              layername = layername.replace("/","_")
              weightname = '%s%s_weights.dat' % (weightpath, layername)
              npweight = np.fromfile(weightname, dtype = np.float32)
              length = np.shape(npweight)[0]
              wshape = [owl_net.units[i].inner_product_param.num_output, length / owl_net.units[i].inner_product_param.num_output]
              owl_net.units[i].weight = owl.from_numpy(npweight).reshape(wshape)
              
              weightname = '%s%s_weightdelta.dat' % (weightpath, layername)
              if os.path.isfile(weightname):
                  npweightdelta = np.fromfile(weightname, dtype = np.float32)
                  owl_net.units[i].weightdelta = owl.from_numpy(npweightdelta).reshape(wshape)             
              
              biasname = '%s%s_bias.dat' % (weightpath, layername)
              npbias = np.fromfile(biasname, dtype = np.float32)
              bshape = [owl_net.units[i].inner_product_param.num_output, 1]
              owl_net.units[i].bias = owl.from_numpy(npbias).reshape(bshape)
              
              biasname = '%s%s_biasdelta.dat' % (weightpath, layername)
              if os.path.isfile(biasname):
                  npbiasdetla = np.fromfile(biasname, dtype = np.float32)
                  owl_net.units[i].biasdelta = owl.from_numpy(npbiasdetla).reshape(bshape)
          if isinstance(owl_net.units[i], net.ConvConnection):
              #print owl_net.units[i].name
              layername = owl_net.units[i].name
              layername = layername.replace("/","_")
              
              weightname = '%s%s_weights.dat' % (weightpath, layername)
              npweight = np.fromfile(weightname, dtype = np.float32)
              length = np.shape(npweight)[0]
              conv_params = owl_net.units[i].conv_params
              input_channel = length / conv_params.kernel_size / conv_params.kernel_size / conv_params.num_output
              wshape = [conv_params.kernel_size, conv_params.kernel_size, input_channel, conv_params.num_output]
              owl_net.units[i].weight = owl.from_numpy(npweight).reshape(wshape)
 
              weightname = '%s%s_weightdelta.dat' % (weightpath, layername)
              if os.path.isfile(weightname):
                  npweightdelta = np.fromfile(weightname, dtype = np.float32)
                  owl_net.units[i].weightdelta = owl.from_numpy(npweightdelta).reshape(wshape)              
              
              biasname = '%s%s_bias.dat' % (weightpath, layername)
              npbias = np.fromfile(biasname, dtype = np.float32)
              bshape = [owl_net.units[i].conv_params.num_output]
              owl_net.units[i].bias = owl.from_numpy(npbias).reshape(bshape)
               
              biasname = '%s%s_biasdelta.dat' % (weightpath, layername)
              if os.path.isfile(biasname):
                  npbiasdetla = np.fromfile(biasname, dtype = np.float32)
                  owl_net.units[i].biasdelta = owl.from_numpy(npbiasdetla).reshape(bshape)
Exemple #16
0
 def test(self):
 	base = np.arange(0,10, dtype = np.float32)
 	base = np.reshape(base,[2,5])
 	expected = np.transpose(base)
 	
 	tmp = owl.from_numpy(base)
     test = tmp.trans()
     #print 'Expected\n',expected
     #print "Actual\n",test.to_numpy()
     self.assertTrue(np.allclose(expected,test.to_numpy()))
Exemple #17
0
 def forward(self, from_btm, to_top, phase):
     to_top[self.top_names[0]] = co.softmax(from_btm[self.btm_names[0]], co.soft_op.instance)
     self.ff_y = to_top[self.top_names[0]]
     #turn label into matrix form
     nplabel = np.zeros([self.ff_y.shape[1], self.ff_y.shape[0]], dtype=np.float32)
     self.strlabel = from_btm[self.btm_names[1]]
     
     for i in range(len(self.strlabel)):
         nplabel[i, self.strlabel[i]] = 1
     self.y = owl.from_numpy(nplabel)
Exemple #18
0
 def test(self):
     base = np.arange(-10,10)
     owlbase = owl.from_numpy(base)
     print fpgatestinit.devices
     fpgatestinit.setfpga()
     test = owl.NArray.relu(owlbase)
     expected = base.clip(0,20)
     print 'Expected\n',expected
     print "Actual\n",test.to_numpy()
     self.assertTrue(np.array_equal(expected,test.to_numpy()))
Exemple #19
0
    def forward(self, from_btm, to_top, phase):
        if self.generator == None:
            self.generator = self.dp.get_train_mb(self.mirror, phase)

        while True:
            try:
                (samples, labels) = next(self.generator)
                if len(labels) == 0:
                    (samples, labels) = next(self.generator)
            except StopIteration:
                print 'Have scanned the whole dataset; start from the begginning agin'
                self.generator = self.dp.get_train_mb(self.mirror, phase)
                continue
            break

        to_top[self.top_names[0]] = owl.from_numpy(samples).reshape(
            [self.crop_size, self.crop_size, 3, samples.shape[0]])
        to_top[self.top_names[1]] = owl.from_numpy(labels)
        '''
 def test(self):
     base = np.asarray([40.0,20.0,30.0,10.0])
     max = np.max(base)
     base = np.reshape(base, [1,1,1,4])
     owlarray = owl.from_numpy(base)
     expected = np.exp(base - max)
     expected = expected / np.sum(expected)
     test = conv.softmax(owlarray)
     #print 'Expected\n',expected
     #print "Actual\n",test.to_numpy()
     self.assertTrue(np.allclose(expected, test.to_numpy()))
Exemple #21
0
def train_network(filename, model, num_epochs=5, minibatch_size=256, lr=0.1, lr_decay= 0.95, mom=0.9, wd=5e-4):
    # load data
    (train_data, test_data) = mnist_io.load_mb_from_mat(filename, minibatch_size / len(devs))
    num_test_samples = test_data[0].shape[0]
    test_samples = owl.from_numpy(test_data[0]).reshape([28, 28, 1, num_test_samples])
    test_labels = owl.from_numpy(test_data[1])
    for i in xrange(num_epochs):
        print "---Epoch #", i
        last = time.time()
        count = 0
        weightgrads = [None] * len(devs)
        biasgrads = [None] * len(devs)
        for (mb_samples, mb_labels) in train_data:
            count += 1
            current_dev = count % len(devs)
            owl.set_device(devs[current_dev])
            num_samples = mb_samples.shape[0]
            data = owl.from_numpy(mb_samples).reshape([28, 28, 1, num_samples])
            label = owl.from_numpy(mb_labels)
            #print "\t[{}]Train Data imported to minerva format".format(count)
            out, weightgrads[current_dev], biasgrads[current_dev] = bpprop(model, data, label)
            #print "\t[{}]Backprop complete".format(count)
#             print "dev {}".format(current_dev)
            if current_dev == 0:
#                 print "pre-merge"
                for k in range(len(model.weights)):
                    model.weightdelta[k] = mom * model.weightdelta[k] - lr / num_samples / len(devs) * multi_dev_merge(weightgrads, 0, k) - lr * wd * model.weights[k]
#                     print "\t weight merge"
                    model.biasdelta[k] = mom * model.biasdelta[k] - lr / num_samples / len(devs) * multi_dev_merge(biasgrads, 0, k)
#                     print "\t bias merge"
                    model.weights[k] += model.weightdelta[k]
                    model.bias[k] += model.biasdelta[k]
#                 print "post-merge"
                if count % (len(devs) * lazy_cycle) == 0:
                    print_training_accuracy(out, label, num_samples, 'Training ' + str(count))
                    owl.print_profiler_result()
        print '---End of Epoch #', i, 'time:', time.time() - last
        lr = lr*lr_decay
        # do test
        out, _, _  = bpprop(model, test_samples, test_labels)
        print_training_accuracy(out, test_labels, num_test_samples, 'Testing')
Exemple #22
0
    def forward(self, from_btm, to_top, phase):
        to_top[self.top_names[0]] = co.softmax(from_btm[self.btm_names[0]],
                                               co.soft_op.instance)
        self.ff_y = to_top[self.top_names[0]]
        #turn label into matrix form
        nplabel = np.zeros([self.ff_y.shape[1], self.ff_y.shape[0]],
                           dtype=np.float32)
        self.strlabel = from_btm[self.btm_names[1]]

        for i in range(len(self.strlabel)):
            nplabel[i, self.strlabel[i]] = 1
        self.y = owl.from_numpy(nplabel)
Exemple #23
0
    def init_weights_with_filler(self):
        ''' Init weights & bias. The function will be called during weight initialization.

        Currently, four types of initializers are supported: ``"constant", "gaussian", "uniform", "xavier"``.
        '''
        #init weight
        npweights = None
        if self.weight_filler.type == "constant":
            npweights = np.ones(self.wshape,
                                dtype=np.float32) * self.weight_filler.value
        elif self.weight_filler.type == "gaussian":
            npweights = np.random.normal(self.weight_filler.mean,
                                         self.weight_filler.std, self.wshape)
        elif self.weight_filler.type == "uniform":
            npweights = np.random.uniform(self.weight_filler.min,
                                          self.weight_filler.max, self.wshape)
        elif self.weight_filler.type == "xavier":
            fan_in = np.prod(self.in_shape[:])
            scale = np.sqrt(float(3) / fan_in)
            npweights = np.random.uniform(-scale, scale, self.wshape)
        self.weight = owl.from_numpy(npweights.astype(np.float32)).reshape(
            self.wshape)

        #init bias
        npwbias = None
        if self.bias_filler.type == "constant":
            npbias = np.ones(self.bshape,
                             dtype=np.float32) * self.bias_filler.value
        elif self.bias_filler.type == "gaussian":
            npbias = np.random.normal(self.bias_filler.mean,
                                      self.bias_filler.std, self.bshape)
        elif self.bias_filler.type == "uniform":
            npbias = np.random.uniform(self.bias_filler.min,
                                       self.bias_filler.max, self.bshape)
        elif self.bias_filler.type == "xavier":
            fan_in = np.prod(self.in_shape[:])
            scale = np.sqrt(float(3) / fan_in)
            npbias = np.random.uniform(-scale, scale, self.bshape)
        self.bias = owl.from_numpy(npbias.astype(np.float32)).reshape(
            self.bshape)
 def test(self):
     # Expected
     img = np.arange(0,32, dtype=np.float32)
     img = np.reshape(img,[1,2,4,4])
     filter = np.arange(0,2*2*2*2, dtype=np.float32)
     filter = np.reshape(filter,[2,2,2,2])
     bias = np.ones([2])
     expected = np.asarray([[[441,497],
                             [665,721]],
                            [[1113,1297],
                             [1849,2033]]])
     
     # test
     owlimg = owl.from_numpy(img)
     owlfilter = owl.from_numpy(filter)
     owlbias = owl.from_numpy(bias)
     convolver = owl.conv.Convolver(0,0,2,2)   
     test = convolver.ff(owlimg, owlfilter, owlbias)
     
     #print 'Expected\n',expected
     #print "Actual\n",test.to_numpy()
     self.assertTrue(np.allclose(expected, test.to_numpy()))
Exemple #25
0
def train_network(model,
                  num_epochs=100,
                  minibatch_size=10,
                  dropout_rate=0.5,
                  eps_w=0.01,
                  mom=0.9,
                  wd=0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()
    cropped_size = 224

    dp = ImageNetDataProvider(
        mean_file='/home/minjie/data/imagenet/imagenet_mean.binaryproto',
        train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    #mark the output layer
    output_layer = 'loss3/loss3'

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size, cropped_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape(
                [cropped_size, cropped_size, 3, num_samples])
            target = owl.from_numpy(labels)
            model.ff(data, target)
            print_training_accuracy(model.layers[output_layer].get_act(),
                                    target, minibatch_size)
            model.bp(data, target)
            model.update(num_samples, eps_w, mom, wd)

            exit(0)
Exemple #26
0
def check_weight(owl_net, checklayer):
    h = 1e-2
    threshold = 1e-4

    for iteridx in range(10):
        #disturb the weights
        oriweight = checklayer.weight
        npweight = checklayer.weight.to_numpy()
        weightshape = np.shape(npweight)
        npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
        print np.shape(npweight)
        position = np.random.randint(0, np.shape(npweight)[0])
        print position
        disturb = np.zeros(np.shape(npweight), dtype=np.float32)
        disturb[position] = h
        oriposval = npweight[position]
        npweight += disturb
        newposval = npweight[position]
        npweight = npweight.reshape(weightshape)
        checklayer.weight = owl.from_numpy(npweight)

        #get disturbed loss
        owl_net.forward('TRAIN')
        all_loss = 0
        for i in xrange(len(losslayer)):
            all_loss += owl_net.units[losslayer[i]].getloss()
        all_loss = all_loss / owl_net.batch_size  #+ 0.5 * owl_net.base_weight_decay * newposval * newposval
        #get origin loss
        checklayer.weight = oriweight
        owl_net.forward('TRAIN')
        ori_all_loss = 0
        for i in xrange(len(losslayer)):
            ori_all_loss += owl_net.units[losslayer[i]].getloss()
        ori_all_loss = ori_all_loss / owl_net.batch_size  #+ 0.5 * owl_net.base_weight_decay * oriposval * oriposval
        owl_net.backward('TRAIN')
        #get analytic gradient
        npgrad = checklayer.weightgrad.to_numpy()
        npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
        analy_grad = npgrad[position] / owl_net.batch_size
        #get numerical gradient

        print all_loss
        print ori_all_loss

        num_grad = (all_loss - ori_all_loss) / h

        diff = np.abs(analy_grad - num_grad)
        info = "analy: %f num: %f ratio: %f" % (analy_grad, num_grad,
                                                analy_grad / num_grad)
        print info
Exemple #27
0
def loadmodel(i, model):
    basedir = './newinitmodel/epoch%d/' % (i)
    print 'load from %s' % (basedir)
    for k in range(model.num_layers-1):
        weightshape = model.weights[k].shape
        filename = '%sweights_%d.dat' % (basedir, k)
        weightarray = np.fromfile(filename, dtype=np.float32)
        model.weights[k] = owl.from_numpy(weightarray).reshape(weightshape)

        weightshape = model.weightsdelta[k].shape
        filename = '%sweightsdelta_%d.dat' % (basedir, k)
        weightarray = np.fromfile(filename, dtype=np.float32)
        model.weightsdelta[k] = owl.from_numpy(weightarray).reshape(weightshape)

        weightshape = model.bias[k].shape
        filename = '%sbias_%d.dat' % (basedir, k)
        weightarray = np.fromfile(filename, dtype=np.float32)
        model.bias[k] = owl.from_numpy(weightarray).reshape(weightshape)

        weightshape = model.biasdelta[k].shape
        filename = '%sbiasdelta_%d.dat' % (basedir, k)
        weightarray = np.fromfile(filename, dtype=np.float32)
        model.biasdelta[k] = owl.from_numpy(weightarray).reshape(weightshape)
Exemple #28
0
def train_network(model,
                  num_epochs=100,
                  minibatch_size=256,
                  dropout_rate=0.5,
                  eps_w=0.01,
                  eps_b=0.01,
                  mom=0.9,
                  wd=0.0005):
    gpu0 = owl.create_gpu_device(0)
    owl.set_device(gpu0)
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(
        mean_file=
        '/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
        train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            count = count + 1
            num_samples = samples.shape[0]
            data = owl.from_numpy(samples).reshape([227, 227, 3, num_samples])
            target = owl.from_numpy(labels)

            out, weightsgrad, biasgrad = model.train_one_mb(
                data, target, dropout_rate)
            model.update(weightsgrad, biasgrad, num_samples, mom, eps_w, wd)

            if count % 4 == 0:
                print_training_accuracy(out, target, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
def check_weight(owl_net, checklayer):
    h = 1e-2
    threshold = 1e-4

    for iteridx in range(10):
        #disturb the weights
        oriweight = checklayer.weight
        npweight = checklayer.weight.to_numpy()
        weightshape = np.shape(npweight)
        npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
        print np.shape(npweight)
        position = np.random.randint(0, np.shape(npweight)[0]) 
        print position
        disturb = np.zeros(np.shape(npweight), dtype = np.float32)
        disturb[position] = h
        oriposval = npweight[position]
        npweight += disturb
        newposval = npweight[position]
        npweight = npweight.reshape(weightshape)
        checklayer.weight = owl.from_numpy(npweight)

        #get disturbed loss
        owl_net.forward('TRAIN')
        all_loss = 0
        for i in xrange(len(losslayer)):
            all_loss += owl_net.units[losslayer[i]].getloss()
        all_loss = all_loss / owl_net.batch_size #+ 0.5 * owl_net.base_weight_decay * newposval * newposval
        #get origin loss
        checklayer.weight = oriweight
        owl_net.forward('TRAIN')
        ori_all_loss = 0
        for i in xrange(len(losslayer)):
            ori_all_loss += owl_net.units[losslayer[i]].getloss()
        ori_all_loss = ori_all_loss / owl_net.batch_size #+ 0.5 * owl_net.base_weight_decay * oriposval * oriposval
        owl_net.backward('TRAIN')
        #get analytic gradient
        npgrad = checklayer.weightgrad.to_numpy()
        npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
        analy_grad = npgrad[position] / owl_net.batch_size
        #get numerical gradient
        
        print all_loss
        print ori_all_loss
        
        num_grad = (all_loss - ori_all_loss) / h

        diff = np.abs(analy_grad - num_grad)
        info = "analy: %f num: %f ratio: %f" % (analy_grad, num_grad, analy_grad / num_grad)
        print info
Exemple #30
0
    def forward(self, from_btm, to_top, phase):
        if self.generator == None:
            self.generator = self.dp.get_mb(phase)

        while True:
            try:
                (samples, labels) = next(self.generator)
                if len(labels) == 0:
                    (samples, labels) = next(self.generator)
            except StopIteration:
                print 'Have scanned the whole dataset; start from the begginning agin'
                self.generator = self.dp.get_mb(phase)
                continue
            break

        to_top[self.top_names[0]] = owl.from_numpy(samples).reshape(
                [self.crop_size, self.crop_size, 3, samples.shape[0]])
        #may have multiplier labels
        for i in range (1, len(self.top_names)):
            to_top[self.top_names[i]] = labels[:,i - 1]
Exemple #31
0
 def forward(self, from_btm, to_top, phase):
     if self.top_k == 1:
         predict = from_btm[self.btm_names[0]].max_index(0)
         ground_truth = owl.from_numpy(from_btm[self.btm_names[1]]).reshape(predict.shape)
         self.batch_size = from_btm[self.btm_names[0]].shape[1]
         correct = (predict - ground_truth).count_zero()
         self.acc = correct * 1.0 / self.batch_size
     elif self.top_k == 5:
         predict = from_btm[self.btm_names[0]].to_numpy()
         top_5 = np.argsort(predict, axis=1)[:,::-1]
         ground_truth = from_btm[self.btm_names[1]]
         self.batch_size = np.shape(ground_truth)[0]
         correct = 0
         for i in range(self.batch_size):
             for t in range(5):
                 if ground_truth[i] == top_5[i,t]:
                     correct += 1
                     break
         self.acc = correct * 1.0 / self.batch_size
     else:
         assert(FALSE)
Exemple #32
0
 def forward(self, from_btm, to_top, phase):
     if self.top_k == 1:
         predict = from_btm[self.btm_names[0]].max_index(0)
         ground_truth = owl.from_numpy(from_btm[self.btm_names[1]]).reshape(
             predict.shape)
         self.batch_size = from_btm[self.btm_names[0]].shape[1]
         correct = (predict - ground_truth).count_zero()
         self.acc = correct * 1.0 / self.batch_size
     elif self.top_k == 5:
         predict = from_btm[self.btm_names[0]].to_numpy()
         top_5 = np.argsort(predict, axis=1)[:, ::-1]
         ground_truth = from_btm[self.btm_names[1]]
         self.batch_size = np.shape(ground_truth)[0]
         correct = 0
         for i in range(self.batch_size):
             for t in range(5):
                 if ground_truth[i] == top_5[i, t]:
                     correct += 1
                     break
         self.acc = correct * 1.0 / self.batch_size
     else:
         assert (FALSE)
Exemple #33
0
    def forward(self, from_btm, to_top, phase):
        """ Feed-forward operation may vary according to phase. 

        .. note::

            LMDB data provider now support multi-view testing, if multiview == True, it will produce concequtive 10 batches of different views of the same original image     
        """
        if self.generator == None:
            if self.multiview == False:
                self.generator = self.dp.get_mb(phase)
            # multiview test
            else:
                self.generator = self.dp.get_multiview_mb()
        while True:
            try:
                (samples, labels) = next(self.generator)
                if len(labels) == 0:
                    (samples, labels) = next(self.generator)
            except StopIteration:
                print "Have scanned the whole dataset; start from the begginning agin"
                if self.multiview == False:
                    self.generator = self.dp.get_mb(phase)
                # multiview test
                else:
                    self.generator = self.dp.get_multiview_mb()
                continue
            break
        to_top[self.top_names[0]] = owl.from_numpy(samples).reshape(
            [self.crop_size, self.crop_size, 3, samples.shape[0]]
        )
        for i in range(1, len(self.top_names)):
            to_top[self.top_names[i]] = labels[:, i - 1]
        # to_top[self.top_names[0]] = owl.zeros([self.crop_size, self.crop_size, 3, 256])
        # for i in range (1, len(self.top_names)):
        # to_top[self.top_names[i]] = np.ones(256)
        self.out = to_top[self.top_names[0]]
 def test(self):
     # Expected
     cpu=owl.create_cpu_device()
     owl.set_device(cpu)
     img = np.arange(0,32, dtype=np.float32) #/32
     img = np.reshape(img,[1,2,4,4])
     expected = np.asarray([[[5,7],
                             [13,15]],
                            [[21,23],
                             [29,31]]]) #/32.0
     #expected = np.asarray([[[ 110.25,  124.25],
     #                        [ 166.25,  180.25]],
     #                       [[ 278.25,  324.25],
     #                        [ 462.25,  508.25]]])
     
     # test
     owlimg = owl.from_numpy(img)
     pooler = owl.conv.Pooler(2,2,2,2)   
     test = pooler.ff(owlimg)
     
     print 'Expected\n',expected
     print "Actual\n",test.to_numpy()
     print "This test must be run with a fractional bit width of 12"
     self.assertTrue(np.allclose(expected, test.to_numpy(), atol= 1.0/(1<<12)*4))
Exemple #35
0
 def initw(n, d):
     magic_number = 0.3
     npa = (np.random.rand(n, d) * 2 - 1) * magic_number  # U[-0.1, 0.1]
     return owl.from_numpy(npa).trans()
Exemple #36
0
def train_network_n(n,
                    model,
                    num_epochs=100,
                    minibatch_size=40,
                    dropout_rate=0.5,
                    eps_w=0.0001,
                    eps_b=0.0002,
                    mom=0.9,
                    wd=0.0005):

    gpus = []
    for i in range(0, n):
        gpus.append(owl.create_gpu_device(i))

    count = 0
    last = time.time()

    dp = ImageNetDataProvider(
        mean_file='./VGGmodel/vgg_mean.binaryproto',
        train_db='/home/minjie/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/minjie/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/minjie/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / n
    correct = 0

    rerun = False
    startepoch = 0
    curepoch = startepoch

    data = [None] * n
    label = [None] * n
    out = [None] * n
    biasgrad = [None] * n
    weightsgrad = [None] * n

    for i in range(startepoch, num_epochs):
        print "---------------------Epoch %d Index %d" % (curepoch, i)
        sys.stdout.flush()
        batchidx = 0
        count = 0
        loadmodel(i, model)
        for (samples, labels) in dp.get_train_mb(minibatch_size, 224):
            count = count + 1
            data[count - 1] = owl.from_numpy(samples).reshape(
                [224, 224, 3, samples.shape[0]])
            label[count - 1] = owl.from_numpy(labels)
            biasgrad[count - 1] = [None] * (model.num_layers - 1)
            weightsgrad[count - 1] = [None] * (model.num_layers - 1)
            owl.set_device(gpus[count - 1])
            out[count - 1] = train_one_mb(model, data[count - 1],
                                          label[count - 1],
                                          weightsgrad[count - 1],
                                          biasgrad[count - 1])
            out[count - 1].start_eval()
            if count % n > 0:
                continue

            totalweightsgrad = [None] * (model.num_layers - 1)
            totalbiasgrad = [None] * (model.num_layers - 1)
            num_samples = 0
            for gpuidx in range(0, n):
                num_samples += data[gpuidx].shape[-1]
                for k in range(model.num_layers - 1):
                    if model.ff_infos[k]['ff_type'] == 'conv' or model.ff_infos[
                            k]['ff_type'] == 'fully':
                        if gpuidx == 0:
                            totalweightsgrad[k] = weightsgrad[gpuidx][k]
                            totalbiasgrad[k] = biasgrad[gpuidx][k]
                        else:
                            totalweightsgrad[k] += weightsgrad[gpuidx][k]
                            totalbiasgrad[k] += biasgrad[gpuidx][k]

            for k in range(model.num_layers - 1):
                if model.ff_infos[k]['ff_type'] == 'conv' or model.ff_infos[k][
                        'ff_type'] == 'fully':
                    model.weightsdelta[k] = mom * model.weightsdelta[
                        k] - eps_w / num_samples * (
                            totalweightsgrad[k] +
                            wd * num_samples * model.weights[k])
                    model.biasdelta[k] = mom * model.biasdelta[
                        k] - eps_b / num_samples * totalbiasgrad[k]
                    model.weights[k] += model.weightsdelta[k]
                    model.bias[k] += model.biasdelta[k]

            #print num_samples
            if count % n == 0:
                print 'batch %d' % (batchidx)
                batchidx = batchidx + 1
                '''
                #TODO hack
                if batchidx == 2000:
                    savemodel(i+1, model)
                    exit(0)
                '''
                thiscorrect = print_training_accuracy(out[0], label[0],
                                                      data[0].shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()
                count = 0
        savemodel(i + 1, model)
Exemple #37
0
    def gradient_checker(s, checklayer_name):
        ''' Check backpropagation on multiple GPUs
        '''
        h = 1e-2
        threshold = 1e-4
        checklayer = s.owl_net.units[s.owl_net.name_to_uid[checklayer_name][0]] 
        
        losslayer = []
        for i in xrange(len(s.owl_net.units)):
            if isinstance(s.owl_net.units[i], net.SoftmaxUnit):
                losslayer.append(i)
       
        last = None
        '''
        wunits = []
        for i in xrange(len(s.owl_net.units)):
            if isinstance(s.owl_net.units[i], net.WeightedComputeUnit):
                wunits.append(i)
        '''
        wunits = s.owl_net.get_weighted_unit_ids()
        accunits = s.owl_net.get_accuracy_units()
        owl.set_device(s.gpu[0])
        
        for iteridx in range(100):
            #disturb the weights
            oriweight = checklayer.weight
            npweight = checklayer.weight.to_numpy()
            weightshape = np.shape(npweight)
            npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
            position = np.random.randint(0, np.shape(npweight)[0])
            disturb = np.zeros(np.shape(npweight), dtype = np.float32)
            disturb[position] = h
            oriposval = npweight[position]
            npweight += disturb
            newposval = npweight[position]
            npweight = npweight.reshape(weightshape)
            checklayer.weight = owl.from_numpy(npweight)

            all_loss = 0
            # train on multi-gpu

            s.owl_net.forward_check()
            for i in range(len(losslayer)):
                if len(s.owl_net.units[losslayer[i]].loss_weight) == 1:
                    all_loss += (s.owl_net.units[losslayer[i]].getloss() * s.owl_net.units[losslayer[i]].loss_weight[0])
                else:
                    all_loss += s.owl_net.units[losslayer[i]].getloss()

            #get origin loss
            checklayer.weight = oriweight
            ori_all_loss = 0
            # train on multi-gpu
            s.owl_net.forward_check()
            for i in range(len(losslayer)):
                if len(s.owl_net.units[losslayer[i]].loss_weight) == 1:
                    ori_all_loss += (s.owl_net.units[losslayer[i]].getloss() * s.owl_net.units[losslayer[i]].loss_weight[0])
                else:
                    ori_all_loss += s.owl_net.units[losslayer[i]].getloss()

            s.owl_net.backward('TEST')
            #get analytic gradient
            npgrad = checklayer.weightgrad.to_numpy()
            npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
            analy_grad = npgrad[position] /  s.owl_net.units[losslayer[i]].out.shape[1]
           
            num_grad = (all_loss - ori_all_loss) / h
            
            info = "Gradient Check at positon: %d analy: %f num: %f ratio: %f" % (position, analy_grad, num_grad, analy_grad / num_grad)
            print info
Exemple #38
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version=1):

    # Constants
    N = model.Layers[1]  # Number of units
    K = model.Layers[2]  # Vocabulary size

    last_time = time.time()
    # For each epoch
    for epoch_id in range(1, EPOCH + 1):
        epoch_ll = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####

            Tau = len(sent)
            sent_ll = 0  # Sentence log likelihood

            data = [None] * Tau

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[2], model.Layers[1]])
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())
            dEmb = [None] * Tau

            ##### Forward pass #####
            # For each time step

            for t in range(1, Tau):
                # predict the (t+1)'th word from the t'th word
                data[t] = model.emb_weight[sent[t - 1]]
                NVector = np.zeros((K, 1))
                NVector[sent[t]] = 1
                target = owl.from_numpy(NVector).trans()

                act_ig[t] = model.ig_weight_data * data[
                    t] + model.ig_weight_prev * Hout[
                        t - 1] + model.ig_weight_cell * C[
                            t - 1] + model.ig_weight_bias
                act_ig[t] = ele.sigm(act_ig[t])

                act_fg[t] = model.fg_weight_data * data[
                    t] + model.fg_weight_prev * Hout[
                        t - 1] + model.fg_weight_cell * C[
                            t - 1] + model.fg_weight_bias
                act_fg[t] = ele.sigm(act_fg[t])

                act_ff[t] = model.ff_weight_data * data[
                    t] + model.ff_weight_prev * Hout[t -
                                                     1] + model.ff_weight_bias
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                act_og[t] = model.og_weight_data * data[
                    t] + model.og_weight_prev * Hout[
                        t -
                        1] + model.og_weight_cell * C[t] + model.og_weight_bias
                act_og[t] = ele.sigm(act_og[t])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])

                Y = softmax(model.decoder_weights * Hout[t] +
                            model.decoder_bias)

                # BP to Hout
                dY[t] = Y - target
                dBd += dY[t]
                dWd += dY[t] * Hout[t].trans()
                dHout[t] = model.decoder_weights.trans() * dY[t]

                # evaluation
                output = Y.to_numpy(
                )  # Can directly get a single element from Y
                # print output[0, sent[t]]
                sent_ll += math.log(max(output[0, sent[t]], 1e-20), 2)

                #print "Y_0[t]",Y_o[t]
                #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
                #print np.sum(output.to_numpy())
                # output = Ym[t].trans() * data[t]
                # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####

            weight_update_ig_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_cell = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[1], model.Layers[0]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dC = [None] * Tau

            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, Tau)):
                #print "sent",sent
                #print "t",t

                # BP from og controled gate and og
                if tanhC_version:
                    tanhC = ele.tanh(C[t])
                    dTanhC = ele.mult(dHout[t], act_og[t])
                    sen_og = ele.mult(dHout[t], tanhC)
                    dC[t] += ele.mult((1 - ele.mult(tanhC, tanhC)), dTanhC)
                else:
                    sen_og = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                # BP from og
                sen_og = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                  sen_og)
                dHout[t - 1] = model.og_weight_prev.trans() * sen_og
                dC[t] += model.og_weight_cell.trans() * sen_og
                dEmb[t] = model.og_weight_data.trans() * sen_og

                # BP from fg controled gate
                sen_fg = ele.mult(C[t - 1], dC[t])
                dC[t - 1] += ele.mult(act_fg[t], dC[t])

                # BP from ig controled gate
                sen_ig = ele.mult(act_ff[t], dC[t])
                sen_ff = ele.mult(act_ig[t], dC[t])
                sen_ff = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])), sen_ff)
                dEmb[t] += model.ff_weight_data.trans() * sen_ff

                # BP from fg
                sen_fg = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                  sen_fg)
                dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg
                dC[t - 1] += model.fg_weight_cell.trans() * sen_fg
                dEmb[t] += model.fg_weight_data.trans() * sen_fg

                # BP from ig
                sen_ig = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                  sen_ig)
                dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig
                dC[t - 1] += model.ig_weight_cell.trans() * sen_ig
                dEmb[t] += model.ig_weight_data.trans() * sen_ig

                # derivatives on weight matrix and bias
                weight_update_ig_data += sen_ig * data[t].trans()
                weight_update_ig_prev += sen_ig * Hout[t - 1].trans()
                weight_update_ig_cell += sen_ig * C[t - 1].trans()
                weight_update_ig_bias += sen_ig

                weight_update_fg_data += sen_fg * data[t].trans()
                weight_update_fg_prev += sen_fg * Hout[t - 1].trans()
                weight_update_fg_cell += sen_fg * C[t - 1].trans()
                weight_update_fg_bias += sen_fg

                weight_update_og_data += sen_og * data[t].trans()
                weight_update_og_prev += sen_og * Hout[t - 1].trans()
                weight_update_og_cell += sen_og * C[t].trans()
                weight_update_og_bias += sen_og

                weight_update_ff_data += sen_ff * data[t].trans()
                weight_update_ff_prev += sen_ff * Hout[t - 1].trans()
                weight_update_ff_bias += sen_ff

            # normalize the gradients
            rate = learning_rate / Tau

            # weight update
            model.ig_weight_prev -= rate * weight_update_ig_prev
            model.ig_weight_data -= rate * weight_update_ig_data
            model.ig_weight_cell -= rate * weight_update_ig_cell
            model.ig_weight_bias -= rate * weight_update_ig_bias

            model.fg_weight_prev -= rate * weight_update_fg_prev
            model.fg_weight_data -= rate * weight_update_fg_data
            model.fg_weight_cell -= rate * weight_update_fg_cell
            model.fg_weight_bias -= rate * weight_update_fg_bias

            model.og_weight_prev -= rate * weight_update_og_prev
            model.og_weight_data -= rate * weight_update_og_data
            model.og_weight_cell -= rate * weight_update_og_cell
            model.og_weight_bias -= rate * weight_update_og_bias

            model.ff_weight_prev -= rate * weight_update_ff_prev
            model.ff_weight_data -= rate * weight_update_ff_data
            model.ff_weight_bias -= rate * weight_update_ff_bias

            model.decoder_weights -= rate * dWd
            model.decoder_bias -= rate * dBd

            for t in range(1, Tau):
                model.emb_weight[sent[t - 1]] -= rate * dEmb[t]

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))

        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 2**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        last_time = cur_time

    return model, learning_rate
Exemple #39
0
def check_weight_2gpu(owl_net, checklayer, gpu):
    h = 1e-2
    threshold = 1e-4
    wunits = get_weights_id(owl_net)
    wgrad = []
    bgrad = []

    for iteridx in range(10):
        #disturb the weights
        oriweight = checklayer.weight
        npweight = checklayer.weight.to_numpy()
        weightshape = np.shape(npweight)
        npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
        print np.shape(npweight)
        position = np.random.randint(0, np.shape(npweight)[0])
        print position
        disturb = np.zeros(np.shape(npweight), dtype=np.float32)
        disturb[position] = h
        oriposval = npweight[position]
        npweight += disturb
        newposval = npweight[position]
        npweight = npweight.reshape(weightshape)
        checklayer.weight = owl.from_numpy(npweight)

        #get disturbed loss
        owl_net.forward('TRAIN')
        all_loss = 0
        for i in xrange(len(losslayer)):
            all_loss += owl_net.units[losslayer[i]].getloss()
        all_loss = all_loss / owl_net.batch_size  #+ 0.5 * owl_net.base_weight_decay * newposval * newposval

        #get origin loss
        checklayer.weight = oriweight
        owl_net.forward('TRAIN')
        ori_all_loss = 0
        for i in xrange(len(losslayer)):
            ori_all_loss += owl_net.units[losslayer[i]].getloss()
        ori_all_loss = ori_all_loss / owl_net.batch_size  #+ 0.5 * owl_net.base_weight_decay * oriposval * oriposval

        #analy_grad
        owl.set_device(gpu[0])
        owl_net.forward('TRAIN')
        owl_net.backward('TRAIN')
        for wid in wunits:
            wgrad.append(owl_net.units[wid].weightgrad)
            bgrad.append(owl_net.units[wid].biasgrad)
        owl.set_device(gpu[1])
        owl_net.forward('TRAIN')
        owl_net.backward('TRAIN')
        for i in range(len(wunits)):
            wid = wunits[i]
            owl_net.units[wid].weightgrad += wgrad[i]
            owl_net.units[wid].biasgrad += bgrad[i]
        wgrad = []
        bgrad = []

        #get analytic gradient
        npgrad = checklayer.weightgrad.to_numpy()
        npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
        analy_grad = npgrad[position] / owl_net.batch_size / len(gpu)

        print all_loss
        print ori_all_loss
        num_grad = (all_loss - ori_all_loss) / h

        diff = np.abs(analy_grad - num_grad)
        info = "analy: %f num: %f ratio: %f" % (analy_grad, num_grad,
                                                analy_grad / num_grad)
        print info
Exemple #40
0
 def init_net_from_file(self, owl_net, weightpath, snapshotidx):
     '''Load network parameters from a saved snapshot.
     :ivar owl_net: the network to load parameters to
     :ivar str weightpath: the folder storing parameters 
     :ivar int snapshotidx: the index of the snapshot
     '''
     weightpath = "%s/snapshot%d/" % (weightpath, snapshotidx)
     for i in range(len(owl_net.units)):
         if isinstance(owl_net.units[i], net.FullyConnection):
             #print owl_net.units[i].name
             layername = owl_net.units[i].name
             layername = layername.replace("/","_")
             weightname = '%s%s_weights.dat' % (weightpath, layername)
             
             wshape = owl_net.units[i].wshape
             if os.path.isfile(weightname):
                 npweight = np.fromfile(weightname, dtype = np.float32)
                 length = np.shape(npweight)[0]
                 if length == owl_net.units[i].in_shape[0] * owl_net.units[i].out_shape[0]:
                     owl_net.units[i].weight = owl.from_numpy(npweight).reshape(wshape)
                     weightname = '%s%s_weightdelta.dat' % (weightpath, layername)
                     if os.path.isfile(weightname):
                         npweightdelta = np.fromfile(weightname, dtype = np.float32)
                         owl_net.units[i].weightdelta = owl.from_numpy(npweightdelta).reshape(wshape) 
                 else:
                     print "Weight Need Reinit %s" % (owl_net.units[i].name)
             else:
                 print "Weight Need Reinit %s" % (owl_net.units[i].name)
         
             biasname = '%s%s_bias.dat' % (weightpath, layername)
             bshape = owl_net.units[i].bshape
             if os.path.isfile(biasname):
                 npbias = np.fromfile(biasname, dtype = np.float32)
                 length = np.shape(npbias)[0]
                 if length == owl_net.units[i].out_shape[0]:
                     owl_net.units[i].bias = owl.from_numpy(npbias).reshape(bshape)
                     biasname = '%s%s_biasdelta.dat' % (weightpath, layername)
                     if os.path.isfile(biasname):
                         npbiasdetla = np.fromfile(biasname, dtype = np.float32)
                         owl_net.units[i].biasdelta = owl.from_numpy(npbiasdetla).reshape(bshape)
                 else:
                     print "Bias Need Reinit %s" % (owl_net.units[i].name)
             
         if isinstance(owl_net.units[i], net.ConvConnection):
             #print owl_net.units[i].name
             layername = owl_net.units[i].name
             layername = layername.replace("/","_")
             conv_params = owl_net.units[i].conv_params
             
             weightname = '%s%s_weights.dat' % (weightpath, layername)
             wshape = owl_net.units[i].wshape
             if os.path.isfile(weightname):
                 npweight = np.fromfile(weightname, dtype = np.float32)
                 length = np.shape(npweight)[0]
                 if length == owl_net.units[i].in_shape[2] * owl_net.units[i].out_shape[2] * conv_params.kernel_size * conv_params.kernel_size:
                     owl_net.units[i].weight = owl.from_numpy(npweight).reshape(wshape)
                     weightname = '%s%s_weightdelta.dat' % (weightpath, layername)
                     if os.path.isfile(weightname):
                         npweightdelta = np.fromfile(weightname, dtype = np.float32)
                         owl_net.units[i].weightdelta = owl.from_numpy(npweightdelta).reshape(wshape)
                 else:
                     print "Conv Weight Need Reinit %s" % (owl_net.units[i].name)
             else:
                 print "Conv Weight Need Reinit %s" % (owl_net.units[i].name)
           
             biasname = '%s%s_bias.dat' % (weightpath, layername)
             bshape = owl_net.units[i].bshape
             if os.path.isfile(biasname):
                 npbias = np.fromfile(biasname, dtype = np.float32)
                 length = np.shape(npbias)[0]
                 if length == owl_net.units[i].out_shape[2]:
                     owl_net.units[i].bias = owl.from_numpy(npbias).reshape(bshape)
                     biasname = '%s%s_biasdelta.dat' % (weightpath, layername)
                     if os.path.isfile(biasname):
                         npbiasdetla = np.fromfile(biasname, dtype = np.float32)
                         owl_net.units[i].biasdelta = owl.from_numpy(npbiasdetla).reshape(bshape)
                 else:
                     print "Conv Bias Need Reinit %s" % (owl_net.units[i].name)
             else:
                 print "Conv Bias Need Reinit %s" % (owl_net.units[i].name)
Exemple #41
0
 def init_net_from_file(self, owl_net, weightpath, snapshotidx):
     '''Load network parameters from a saved snapshot.
     :ivar owl_net: the network to load parameters to
     :ivar str weightpath: the folder storing parameters 
     :ivar int snapshotidx: the index of the snapshot
     '''
     weightpath = "%ssnapshot%d/" % (weightpath, snapshotidx)
     for i in range(len(owl_net.units)):
         if isinstance(owl_net.units[i], net.FullyConnection):
             #print owl_net.units[i].name
             layername = owl_net.units[i].name
             layername = layername.replace("/","_")
             weightname = '%s%s_weights.dat' % (weightpath, layername)
             
             wshape = owl_net.units[i].wshape
             if os.path.isfile(weightname):
                 npweight = np.fromfile(weightname, dtype = np.float32)
                 length = np.shape(npweight)[0]
                 if length == owl_net.units[i].in_shape[0] * owl_net.units[i].out_shape[0]:
                     owl_net.units[i].weight = owl.from_numpy(npweight).reshape(wshape)
                     weightname = '%s%s_weightdelta.dat' % (weightpath, layername)
                     if os.path.isfile(weightname):
                         npweightdelta = np.fromfile(weightname, dtype = np.float32)
                         owl_net.units[i].weightdelta = owl.from_numpy(npweightdelta).reshape(wshape) 
                 else:
                     print "Weight Need Reinit %s" % (owl_net.units[i].name)
             else:
                 print "Weight Need Reinit %s" % (owl_net.units[i].name)
         
             biasname = '%s%s_bias.dat' % (weightpath, layername)
             bshape = owl_net.units[i].bshape
             if os.path.isfile(biasname):
                 npbias = np.fromfile(biasname, dtype = np.float32)
                 length = np.shape(npbias)[0]
                 if length == owl_net.units[i].out_shape[0]:
                     owl_net.units[i].bias = owl.from_numpy(npbias).reshape(bshape)
                     biasname = '%s%s_biasdelta.dat' % (weightpath, layername)
                     if os.path.isfile(biasname):
                         npbiasdetla = np.fromfile(biasname, dtype = np.float32)
                         owl_net.units[i].biasdelta = owl.from_numpy(npbiasdetla).reshape(bshape)
                 else:
                     print "Bias Need Reinit %s" % (owl_net.units[i].name)
             
         if isinstance(owl_net.units[i], net.ConvConnection):
             #print owl_net.units[i].name
             layername = owl_net.units[i].name
             layername = layername.replace("/","_")
             conv_params = owl_net.units[i].conv_params
             
             weightname = '%s%s_weights.dat' % (weightpath, layername)
             wshape = owl_net.units[i].wshape
             if os.path.isfile(weightname):
                 npweight = np.fromfile(weightname, dtype = np.float32)
                 length = np.shape(npweight)[0]
                 if length == owl_net.units[i].in_shape[2] * owl_net.units[i].out_shape[2] * conv_params.kernel_size * conv_params.kernel_size:
                     owl_net.units[i].weight = owl.from_numpy(npweight).reshape(wshape)
                     weightname = '%s%s_weightdelta.dat' % (weightpath, layername)
                     if os.path.isfile(weightname):
                         npweightdelta = np.fromfile(weightname, dtype = np.float32)
                         owl_net.units[i].weightdelta = owl.from_numpy(npweightdelta).reshape(wshape)
                 else:
                     print "Conv Weight Need Reinit %s" % (owl_net.units[i].name)
             else:
                 print "Conv Weight Need Reinit %s" % (owl_net.units[i].name)
           
             biasname = '%s%s_bias.dat' % (weightpath, layername)
             bshape = owl_net.units[i].bshape
             if os.path.isfile(biasname):
                 npbias = np.fromfile(biasname, dtype = np.float32)
                 length = np.shape(npbias)[0]
                 if length == owl_net.units[i].out_shape[2]:
                     owl_net.units[i].bias = owl.from_numpy(npbias).reshape(bshape)
                     biasname = '%s%s_biasdelta.dat' % (weightpath, layername)
                     if os.path.isfile(biasname):
                         npbiasdetla = np.fromfile(biasname, dtype = np.float32)
                         owl_net.units[i].biasdelta = owl.from_numpy(npbiasdetla).reshape(bshape)
                 else:
                     print "Conv Bias Need Reinit %s" % (owl_net.units[i].name)
             else:
                 print "Conv Bias Need Reinit %s" % (owl_net.units[i].name)
def train_network(model,
                  num_epochs=100,
                  minibatch_size=256,
                  dropout_rate=0.5,
                  eps_w=0.01,
                  eps_b=0.01,
                  mom=0.9,
                  wd=0.0005):
    gpu = [None] * 2
    gpu[0] = owl.create_gpu_device(0)
    gpu[1] = owl.create_gpu_device(1)
    num_layers = 20
    num_weights = 8
    count = 0
    last = time.time()

    dp = ImageNetDataProvider(
        mean_file=
        '/home/yutian/data/config_file/google_model/imagenet_mean.binaryproto',
        train_db='/home/yutian/data/imagenet/ilsvrc12_train_lmdb',
        val_db='/home/yutian/data/imagenet/ilsvrc12_val_lmdb',
        test_db='/home/yutian/data/imagenet/ilsvrc12_test_lmdb')

    minibatch_size = minibatch_size / 2

    wgrad = [None] * 2
    bgrad = [None] * 2
    num_samples = 0

    for i in xrange(num_epochs):
        print "---------------------Epoch #", i
        for (samples, labels) in dp.get_train_mb(minibatch_size):
            #for j in range(300):
            count = count + 1
            gpuid = count % 2
            owl.set_device(gpu[gpuid])

            data = owl.from_numpy(samples).reshape(
                [227, 227, 3, samples.shape[0]])
            label = owl.from_numpy(labels)
            #data = owl.randn([227, 227, 3, 128], 0.0, 0.01)
            #label = owl.randn([1000, 128], 0.0, 0.01)
            num_samples += data.shape[-1]
            (out, wgrad[gpuid],
             bgrad[gpuid]) = model.train_one_mb(data, label, dropout_rate)

            if count % 2 != 0:
                continue

            for k in range(num_weights):
                wgrad[0][k] += wgrad[1][k]
                bgrad[0][k] += bgrad[1][k]

            model.update(wgrad[0], bgrad[0], num_samples, mom, eps_w, wd)

            if count % 8 == 0:
                print_training_accuracy(out, label, data.shape[-1])
                print "time: %s" % (time.time() - last)
                last = time.time()

            num_samples = 0
            wgrad = [None] * 2
            bgrad = [None] * 2
Exemple #43
0
def LSTM_test(model, sents, vocab_size, words, tanhC_version=1):

    N = 10
    K = vocab_size

    test_ll = 0
    # For each sentence
    for sent_id, sent in enumerate(sents):
        #print "sent_id",sent_id
        #print "sent", sent
        #print "sents", sents
        ##### Initialize activations #####
        Tau = len(sent)
        sent_ll = 0  # Sentence log likelihood
        batch_size = Tau

        data = [None] * Tau
        prev = [None] * Tau
        embed = np.zeros((K, 1))
        embed[sent[0]] = 1
        data[0] = owl.from_numpy(embed).trans()

        Hout = [None] * Tau
        Hout[0] = owl.zeros([N, 1])

        act_ig = [None] * Tau
        act_fg = [None] * Tau
        act_og = [None] * Tau
        act_ff = [None] * Tau

        C = [None] * Tau
        C[0] = owl.zeros([N, 1])
        Ym = [None] * Tau

        ##### Forward pass #####
        # For each time step
        for t in range(1, Tau):
            prev[t] = Hout[t - 1]
            embed = np.zeros((K, 1))
            embed[sent[t]] = 1
            data[t] = owl.from_numpy(embed).trans()

            act_ig[t] = model.ig_weight_data.trans() * data[
                t - 1] + model.ig_weight_prev.trans(
                ) * prev[t] + model.ig_weight_bias
            act_fg[t] = model.fg_weight_data.trans() * data[
                t - 1] + model.fg_weight_prev.trans(
                ) * prev[t] + model.fg_weight_bias
            act_og[t] = model.og_weight_data.trans() * data[
                t - 1] + model.og_weight_prev.trans(
                ) * prev[t] + model.og_weight_bias
            act_ff[t] = model.ff_weight_data.trans() * data[
                t - 1] + model.ff_weight_prev.trans(
                ) * prev[t] + model.ff_weight_bias

            act_ig[t] = ele.sigm(act_ig[t])
            act_fg[t] = ele.sigm(act_fg[t])
            act_og[t] = ele.sigm(act_og[t])
            act_ff[t] = ele.tanh(act_ff[t])

            C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                act_fg[t], C[t - 1])

            if tanhC_version:
                Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
            else:
                Hout[t] = ele.mult(act_og[t], C[t])
            Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                            model.decoder_bias)

            #print "Y_0[t]",Y_o[t]
            #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
            output = Ym[t].trans() * data[t]
            test_ll += math.log10(max(np.sum(output.to_numpy()), 1e-20))

    print test_ll
    test_ent = test_ll * (-1) / words
    test_ppl = 10**test_ent

    print("Test PPL = %f" % (test_ppl))
Exemple #44
0
def LSTM_train(model,
               sents,
               vocab_size,
               words,
               NUM_EPOCHS=100,
               tanhC_version=1):

    # Constants
    ALPHA = 1  # Learning rate
    N = 10  # Number of units
    learning_rate = 1

    K = vocab_size  # Vocabulary size

    # For each epoch
    last_ll = 1e99
    last_time = time.time()
    for epoch_id in range(1, NUM_EPOCHS + 1):
        epoch_ll = 0
        # For each sentence
        for sent_id, sent in enumerate(sents):
            #print "sent_id",sent_id
            #print "sent", sent
            #print "sents", sents
            ##### Initialize activations #####
            Tau = len(sent)
            sent_ll = 0  # Sentence log likelihood
            batch_size = Tau

            data = [None] * Tau
            prev = [None] * Tau
            embed = np.zeros((K, 1))
            embed[sent[0]] = 1
            data[0] = owl.from_numpy(embed).trans()

            Hout = [None] * Tau
            Hout[0] = owl.zeros([N, 1])

            act_ig = [None] * Tau
            act_fg = [None] * Tau
            act_og = [None] * Tau
            act_ff = [None] * Tau

            C = [None] * Tau
            C[0] = owl.zeros([N, 1])
            Ym = [None] * Tau
            dY = [None] * Tau

            dBd = owl.zeros([model.Layers[2], 1])  #dY.sum(0)
            dWd = owl.zeros([model.Layers[1],
                             model.Layers[2]])  #Hout.transpose().dot(dY)
            dHout = [None] * Tau  #dY.dot(model.decoder_weights.transpose())

            ##### Forward pass #####
            # For each time step
            for t in range(1, Tau):
                prev[t] = Hout[t - 1]
                embed = np.zeros((K, 1))
                embed[sent[t]] = 1
                data[t] = owl.from_numpy(embed).trans()

                act_ig[t] = model.ig_weight_data.trans() * data[
                    t - 1] + model.ig_weight_prev.trans(
                    ) * prev[t] + model.ig_weight_bias
                act_fg[t] = model.fg_weight_data.trans() * data[
                    t - 1] + model.fg_weight_prev.trans(
                    ) * prev[t] + model.fg_weight_bias
                act_og[t] = model.og_weight_data.trans() * data[
                    t - 1] + model.og_weight_prev.trans(
                    ) * prev[t] + model.og_weight_bias
                act_ff[t] = model.ff_weight_data.trans() * data[
                    t - 1] + model.ff_weight_prev.trans(
                    ) * prev[t] + model.ff_weight_bias

                act_ig[t] = ele.sigm(act_ig[t])
                act_fg[t] = ele.sigm(act_fg[t])
                act_og[t] = ele.sigm(act_og[t])
                act_ff[t] = ele.tanh(act_ff[t])

                C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(
                    act_fg[t], C[t - 1])

                if tanhC_version:
                    Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
                else:
                    Hout[t] = ele.mult(act_og[t], C[t])
                Ym[t] = softmax(model.decoder_weights.trans() * Hout[t] +
                                model.decoder_bias)

                dY[t] = data[t] - Ym[t]
                dBd += dY[t] / batch_size
                dWd += Hout[t] * dY[t].trans() / batch_size
                dHout[t] = model.decoder_weights * dY[t]

                #print "Y_0[t]",Y_o[t]
                #print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
                #print np.sum(output.to_numpy())
                # output = Ym[t].trans() * data[t]
                # sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
            ##### Initialize gradient vectors #####
            for t in range(1, Tau):
                output = Ym[t].trans() * data[t]
                sent_ll += math.log10(max(np.sum(output.to_numpy()), 1e-20))

            sen_ig = [None] * Tau
            sen_fg = [None] * Tau
            sen_og = [None] * Tau
            sen_ff = [None] * Tau

            weight_update_ig_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ig_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

            weight_update_fg_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_fg_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

            weight_update_og_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_og_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_og_bias = owl.zeros([model.Layers[1], 1])

            weight_update_ff_data = owl.zeros(
                [model.Layers[0], model.Layers[1]])
            weight_update_ff_prev = owl.zeros(
                [model.Layers[1], model.Layers[1]])
            weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

            dHin = owl.zeros([model.Layers[1], model.Layers[1]])
            dC = [None] * Tau
            for t in xrange(Tau):
                dC[t] = owl.zeros(C[t].shape)

            # Calculate the error and add it
            for t in reversed(range(1, len(sent))):
                #print "sent",sent
                #print "t",t
                if tanhC_version:
                    tanhCt = ele.tanh(C[t])
                    sen_og[t] = ele.mult(tanhCt, dHout[t])
                    dC[t] += ele.mult((1 - ele.mult(tanhCt, tanhCt)),
                                      ele.mult(act_og[t], dHout[t]))
                else:
                    sen_og[t] = ele.mult(C[t], dHout[t])
                    dC[t] += ele.mult(act_og[t], dHout[t])

                sen_fg[t] = owl.zeros([model.Layers[1], 1])
                if t > 0:
                    sen_fg[t] = ele.mult(C[t - 1], dC[t])
                    dC[t - 1] += ele.mult(act_og[t], dC[t])
                sen_ig[t] = ele.mult(act_ff[t], dC[t])
                sen_ff[t] = ele.mult(act_ig[t], dC[t])

                # backprop activation functions
                sen_ff[t] = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])),
                                     sen_ff[t])
                sen_ig[t] = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])),
                                     sen_ig[t])
                sen_fg[t] = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])),
                                     sen_fg[t])
                sen_og[t] = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])),
                                     sen_og[t])

                # backprop matrix multiply
                weight_update_ig_data += data[t] * sen_ig[t].trans()
                weight_update_ig_prev += prev[t] * sen_ig[t].trans()
                weight_update_fg_bias += sen_ig[t]  # sen_ig[t].sum(0 or 1)

                weight_update_fg_data += data[t] * sen_fg[t].trans()
                weight_update_fg_prev += prev[t] * sen_fg[t].trans()
                weight_update_fg_bias += sen_fg[t]

                weight_update_og_data += data[t] * sen_og[t].trans()
                weight_update_og_prev += prev[t] * sen_og[t].trans()
                weight_update_og_bias += sen_og[t]

                weight_update_ff_data += data[t] * sen_ff[t].trans()
                weight_update_ff_prev += prev[t] * sen_ff[t].trans()
                weight_update_ff_bias += sen_ff[t]

                if t > 1:
                    dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig[t]
                    dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg[t]
                    dHout[t - 1] += model.og_weight_prev.trans() * sen_og[t]
                    dHout[t - 1] += model.ff_weight_prev.trans() * sen_ff[t]

            # normalize the gradients
            # dWLSTM /= batch_size
            weight_update_ig_prev /= batch_size
            weight_update_ig_data /= batch_size
            weight_update_ig_bias /= batch_size

            weight_update_fg_prev /= batch_size
            weight_update_fg_data /= batch_size
            weight_update_fg_bias /= batch_size

            weight_update_og_prev /= batch_size
            weight_update_og_data /= batch_size
            weight_update_og_bias /= batch_size

            weight_update_ff_prev /= batch_size
            weight_update_ff_data /= batch_size
            weight_update_ff_bias /= batch_size

            # weight update
            model.ig_weight_prev += learning_rate * weight_update_ig_prev
            model.ig_weight_data += learning_rate * weight_update_ig_data
            model.ig_weight_bias += learning_rate * weight_update_ig_bias

            model.fg_weight_prev += learning_rate * weight_update_fg_prev
            model.fg_weight_data += learning_rate * weight_update_fg_data
            model.fg_weight_bias += learning_rate * weight_update_fg_bias

            model.og_weight_prev += learning_rate * weight_update_og_prev
            model.og_weight_data += learning_rate * weight_update_og_data
            model.og_weight_bias += learning_rate * weight_update_og_bias

            model.ff_weight_prev += learning_rate * weight_update_ff_prev
            model.ff_weight_data += learning_rate * weight_update_ff_data
            model.ff_weight_bias += learning_rate * weight_update_ff_bias

            model.decoder_weights += learning_rate * dWd
            model.decoder_bias += learning_rate * dBd

            # Print results
            epoch_ll += sent_ll
            # print(" Sentence %d LL: %f" % (sent_id, sent_ll))
        epoch_ent = epoch_ll * (-1) / words
        epoch_ppl = 10**epoch_ent
        cur_time = time.time()
        print("Epoch %d (alpha=%f) PPL=%f" %
              (epoch_id, learning_rate, epoch_ppl))
        print "  time consumed:", cur_time - last_time
        if last_ll > epoch_ll:
            learning_rate /= 2.0
        last_ll = epoch_ll
        last_time = cur_time
Exemple #45
0
def LSTM_train(model, sents, words, learning_rate, EPOCH, tanhC_version = 1):

	# Constants
	N = model.Layers[1]       # Number of units
	K = model.Layers[2]       # Vocabulary size

	last_time = time.time()
	# For each epoch
	for epoch_id in range(1, EPOCH + 1):
		epoch_ll = 0
		# For each sentence
		for sent_id, sent in enumerate(sents):
			#print sent_id
			#print "sent", sent
			#print "sents", sents
			##### Initialize activations #####

			Tau = len(sent)
			sent_ll = 0 # Sentence log likelihood

			data = [None] * Tau

			Hout = [None] * Tau
			Hout[0] = owl.zeros([N, 1])

			act_ig = [None] * Tau
			act_fg = [None] * Tau
			act_og = [None] * Tau
			act_ff = [None] * Tau

			C = [None] * Tau
			C[0] = owl.zeros([N, 1])
			dY = [None] * Tau

			dBd = owl.zeros([model.Layers[2], 1]) #dY.sum(0)
			dWd = owl.zeros([model.Layers[2], model.Layers[1]]) 
			dHout = [None] * Tau #dY.dot(model.decoder_weights.transpose())
			dEmb = [None] * Tau

			##### Forward pass #####
			# For each time step

			for t in range(1, Tau):
				# predict the (t+1)'th word from the t'th word
				data[t] = model.emb_weight[sent[t - 1]]
				NVector = np.zeros((K, 1))
				NVector[sent[t]] = 1
				target = owl.from_numpy(NVector).trans()

				act_ig[t] = model.ig_weight_data * data[t] + model.ig_weight_prev * Hout[t - 1] + model.ig_weight_cell * C[t - 1] + model.ig_weight_bias
				act_ig[t] = ele.sigm(act_ig[t])

				act_fg[t] = model.fg_weight_data * data[t] + model.fg_weight_prev * Hout[t - 1] + model.fg_weight_cell * C[t - 1] + model.fg_weight_bias
				act_fg[t] = ele.sigm(act_fg[t])

				act_ff[t] = model.ff_weight_data * data[t] + model.ff_weight_prev * Hout[t - 1] + model.ff_weight_bias
				act_ff[t] = ele.tanh(act_ff[t])

				C[t] = ele.mult(act_ig[t], act_ff[t]) + ele.mult(act_fg[t], C[t - 1])

				act_og[t] = model.og_weight_data * data[t] + model.og_weight_prev * Hout[t - 1] + model.og_weight_cell * C[t] + model.og_weight_bias
				act_og[t] = ele.sigm(act_og[t])

				if tanhC_version:
					Hout[t] = ele.mult(act_og[t], ele.tanh(C[t]))
				else:
					Hout[t] = ele.mult(act_og[t], C[t])

				Y = softmax(model.decoder_weights * Hout[t] + model.decoder_bias)

				# BP to Hout
				dY[t] = Y - target
				dBd += dY[t]
				dWd += dY[t] * Hout[t].trans()
				dHout[t] = model.decoder_weights.trans() * dY[t]

				# evaluation
				output = Y.to_numpy()			# Can directly get a single element from Y
				# print output[0, sent[t]]
				sent_ll += math.log(max(output[0, sent[t]],1e-20), 2)

				#print "Y_0[t]",Y_o[t]
				#print "Y_o[t][sent[t]]",Y_o[t][sent[t]]
				#print np.sum(output.to_numpy())
				# output = Ym[t].trans() * data[t]
				# sent_ll += math.log10( max(np.sum(output.to_numpy()),1e-20) )
			##### Initialize gradient vectors #####
				

			weight_update_ig_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_ig_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ig_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ig_bias = owl.zeros([model.Layers[1], 1])

			weight_update_fg_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_fg_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_fg_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_fg_bias = owl.zeros([model.Layers[1], 1])

			weight_update_og_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_og_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_og_cell = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_og_bias = owl.zeros([model.Layers[1], 1])

			weight_update_ff_data = owl.zeros([model.Layers[1], model.Layers[0]])
			weight_update_ff_prev = owl.zeros([model.Layers[1], model.Layers[1]])
			weight_update_ff_bias = owl.zeros([model.Layers[1], 1])

			dC = [None] * Tau

			for t in xrange(Tau):
				dC[t] = owl.zeros(C[t].shape)

			# Calculate the error and add it
			for t in reversed(range(1, Tau)):
				#print "sent",sent
				#print "t",t

				# BP from og controled gate and og
				if tanhC_version:
					tanhC = ele.tanh(C[t])
					dTanhC = ele.mult(dHout[t], act_og[t])
					sen_og = ele.mult(dHout[t], tanhC)
					dC[t] += ele.mult((1 - ele.mult(tanhC, tanhC)), dTanhC)
				else:
					sen_og = ele.mult(C[t], dHout[t])
					dC[t] += ele.mult(act_og[t], dHout[t])

				# BP from og
				sen_og = ele.mult(ele.mult(act_og[t], (1.0 - act_og[t])), sen_og)
				dHout[t - 1] = model.og_weight_prev.trans() * sen_og
				dC[t] += model.og_weight_cell.trans() * sen_og
				dEmb[t] = model.og_weight_data.trans() * sen_og

				# BP from fg controled gate
				sen_fg = ele.mult(C[t - 1], dC[t])
				dC[t - 1] += ele.mult(act_fg[t], dC[t])
				
				# BP from ig controled gate
				sen_ig = ele.mult(act_ff[t], dC[t])
				sen_ff = ele.mult(act_ig[t], dC[t])
				sen_ff = ele.mult((1 - ele.mult(act_ff[t], act_ff[t])), sen_ff)
				dEmb[t] += model.ff_weight_data.trans() * sen_ff
				
				# BP from fg
				sen_fg = ele.mult(ele.mult(act_fg[t], (1.0 - act_fg[t])), sen_fg)
				dHout[t - 1] += model.fg_weight_prev.trans() * sen_fg
				dC[t - 1] += model.fg_weight_cell.trans() * sen_fg
				dEmb[t] += model.fg_weight_data.trans() * sen_fg

				# BP from ig
				sen_ig = ele.mult(ele.mult(act_ig[t], (1.0 - act_ig[t])), sen_ig)
				dHout[t - 1] += model.ig_weight_prev.trans() * sen_ig
				dC[t - 1] += model.ig_weight_cell.trans() * sen_ig
				dEmb[t] += model.ig_weight_data.trans() * sen_ig

				# derivatives on weight matrix and bias
				weight_update_ig_data += sen_ig * data[t].trans()
				weight_update_ig_prev += sen_ig * Hout[t - 1].trans()
				weight_update_ig_cell += sen_ig * C[t - 1].trans()
				weight_update_ig_bias += sen_ig

				weight_update_fg_data += sen_fg * data[t].trans()
				weight_update_fg_prev += sen_fg * Hout[t - 1].trans()
				weight_update_fg_cell += sen_fg * C[t - 1].trans()
				weight_update_fg_bias += sen_fg

				weight_update_og_data += sen_og * data[t].trans()
				weight_update_og_prev += sen_og * Hout[t - 1].trans()
				weight_update_og_cell += sen_og * C[t].trans()
				weight_update_og_bias += sen_og

				weight_update_ff_data += sen_ff * data[t].trans()
				weight_update_ff_prev += sen_ff * Hout[t - 1].trans()
				weight_update_ff_bias += sen_ff


			# normalize the gradients
			rate = learning_rate / Tau

			# weight update
			model.ig_weight_prev -= rate * weight_update_ig_prev
			model.ig_weight_data -= rate * weight_update_ig_data
			model.ig_weight_cell -= rate * weight_update_ig_cell
			model.ig_weight_bias -= rate * weight_update_ig_bias

			model.fg_weight_prev -= rate * weight_update_fg_prev
			model.fg_weight_data -= rate * weight_update_fg_data
			model.fg_weight_cell -= rate * weight_update_fg_cell
			model.fg_weight_bias -= rate * weight_update_fg_bias

			model.og_weight_prev -= rate * weight_update_og_prev
			model.og_weight_data -= rate * weight_update_og_data
			model.og_weight_cell -= rate * weight_update_og_cell
			model.og_weight_bias -= rate * weight_update_og_bias

			model.ff_weight_prev -= rate * weight_update_ff_prev
			model.ff_weight_data -= rate * weight_update_ff_data
			model.ff_weight_bias -= rate * weight_update_ff_bias

			model.decoder_weights -= rate * dWd
			model.decoder_bias -= rate * dBd

			for t in range(1, Tau):
				model.emb_weight[sent[t - 1]] -= rate * dEmb[t]

			# Print results
			epoch_ll += sent_ll
			# print(" Sentence %d LL: %f" % (sent_id, sent_ll))

			
		epoch_ent = epoch_ll * (-1) / words
		epoch_ppl = 2 ** epoch_ent
		cur_time = time.time()
		print("Epoch %d (alpha=%f) PPL=%f" % (epoch_id, learning_rate, epoch_ppl))
		print "  time consumed:", cur_time - last_time
		last_time = cur_time

	return model, learning_rate
Exemple #46
0
	def initw(n, d):
		magic_number = 0.3
		npa = (np.random.rand(n, d) * 2 - 1) * magic_number # U[-0.1, 0.1]
		return owl.from_numpy(npa).trans()
Exemple #47
0
 def forward(self, from_btm, to_top, phase):
     predict = from_btm[self.btm_names[0]].argmax(0)
     ground_truth = owl.from_numpy(from_btm[self.btm_names[1]]).reshape(predict.shape)
     self.batch_size = from_btm[self.btm_names[0]].shape[1]
     correct = (predict - ground_truth).count_zero()
     self.acc = correct * 1.0 / self.batch_size
Exemple #48
0
 
 # training parameters
 epsilon = 0.01
 momentum = 0.9
 
 num_epochs = 20
 batch_size = 64
 num_batches = data.shape[1]//batch_size
 
 # model parameters
 num_vis = data.shape[0]
 num_hid = 128
 
 # initialize weights
 np.random.seed(1234)
 weights = owl.from_numpy(0.1 * np.random.randn(num_vis, num_hid)).trans()
 #weights = 0.1 * owl.randn([num_vis, num_hid],0,1)
 bias_v = owl.zeros([1,num_vis])
 bias_h = owl.zeros([1,num_hid])
 
 # initialize weight updates
 d_weights = owl.zeros((num_vis,num_hid ))
 d_bias_v = owl.zeros([1,num_vis])
 d_bias_h = owl.zeros([1,num_hid])
 
 start_time = time.time()
 for epoch in range(num_epochs):
     print("Epoch %i" % (epoch + 1))
     err = []
     weights_old = weights
     for batch in range(num_batches):
Exemple #49
0
    def gradient_checker(s, checklayer_name):
        ''' Check backpropagation on multiple GPUs
        '''
        h = 1e-2
        threshold = 1e-4
        checklayer = s.owl_net.units[s.owl_net.name_to_uid[checklayer_name][0]] 
        
        losslayer = []
        for i in xrange(len(s.owl_net.units)):
            if isinstance(s.owl_net.units[i], net.SoftmaxUnit):
                losslayer.append(i)
       
        last = None
        '''
        wunits = []
        for i in xrange(len(s.owl_net.units)):
            if isinstance(s.owl_net.units[i], net.WeightedComputeUnit):
                wunits.append(i)
        '''
        wunits = s.owl_net.get_weighted_unit_ids()
        accunits = s.owl_net.get_accuracy_units()
        owl.set_device(s.gpu[0])
        
        for iteridx in range(100):
            #disturb the weights
            oriweight = checklayer.weight
            npweight = checklayer.weight.to_numpy()
            weightshape = np.shape(npweight)
            npweight = npweight.reshape(np.prod(weightshape[0:len(weightshape)]))
            position = np.random.randint(0, np.shape(npweight)[0])
            disturb = np.zeros(np.shape(npweight), dtype = np.float32)
            disturb[position] = h
            oriposval = npweight[position]
            npweight += disturb
            newposval = npweight[position]
            npweight = npweight.reshape(weightshape)
            checklayer.weight = owl.from_numpy(npweight)

            all_loss = 0
            # train on multi-gpu

            s.owl_net.forward_check()
            for i in range(len(losslayer)):
                if len(s.owl_net.units[losslayer[i]].loss_weight) == 1:
                    all_loss += (s.owl_net.units[losslayer[i]].getloss() * s.owl_net.units[losslayer[i]].loss_weight[0])
                else:
                    all_loss += s.owl_net.units[losslayer[i]].getloss()

            #get origin loss
            checklayer.weight = oriweight
            ori_all_loss = 0
            # train on multi-gpu
            s.owl_net.forward_check()
            for i in range(len(losslayer)):
                if len(s.owl_net.units[losslayer[i]].loss_weight) == 1:
                    ori_all_loss += (s.owl_net.units[losslayer[i]].getloss() * s.owl_net.units[losslayer[i]].loss_weight[0])
                else:
                    ori_all_loss += s.owl_net.units[losslayer[i]].getloss()

            s.owl_net.backward('TEST')
            #get analytic gradient
            npgrad = checklayer.weightgrad.to_numpy()
            npgrad = npgrad.reshape(np.prod(weightshape[0:len(weightshape)]))
            analy_grad = npgrad[position] /  s.owl_net.units[losslayer[i]].out.shape[1]
           
            num_grad = (all_loss - ori_all_loss) / h
            
            info = "Gradient Check at positon: %d analy: %f num: %f ratio: %f" % (position, analy_grad, num_grad, analy_grad / num_grad)
            print info