Beispiel #1
0
 def weight_update(self):
     if self.next_layer:    
         #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2])
         batch_size = self.out.shape[2]
         
         if self.has_gradients:
             #x = self.w_grad_next.tocpu()
             #x[np.abs(x) < 0.005] = 0
             #del self.w_grad_next                 
             #self.w_grad_next = gpu.array(x)
             lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt, ct.c_float(self.config['momentum']),ct.c_float(self.config['learning_rate']), batch_size)                
             gpu.subtract(self.w_next, self.w_grad_next, self.w_next)
             
             if not self.test_buffer:
                 self.test_buffer = gpu.empty_like(self.w_next)
             
             gpu.fill(self.test_buffer, 0.005)
             gpu.greater(self.w_next, self.test_buffer, self.test_buffer)
             gpu.multiply(self.w_next, self.test_buffer, self.w_next)
             
             
         #apply grad only after initializing RMSProp with the first gradient
         if not self.has_gradients: 
             self.has_gradients = True
             #TODO: this should work
             #gpu.div(self.w_grad_next, batch_size, self.m_next)
             
                  
         if self.config['parallelism'] != 'data':
             self.next_layer.weight_update()
Beispiel #2
0
    def weight_update(self):
        if self.next_layer:
            #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2])
            batch_size = self.out.shape[2]

            if self.has_gradients:
                #x = self.w_grad_next.tocpu()
                #x[np.abs(x) < 0.005] = 0
                #del self.w_grad_next
                #self.w_grad_next = gpu.array(x)
                lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt,
                                      ct.c_float(self.config['momentum']),
                                      ct.c_float(self.config['learning_rate']),
                                      batch_size)
                gpu.subtract(self.w_next, self.w_grad_next, self.w_next)

                if not self.test_buffer:
                    self.test_buffer = gpu.empty_like(self.w_next)

                gpu.fill(self.test_buffer, 0.005)
                gpu.greater(self.w_next, self.test_buffer, self.test_buffer)
                gpu.multiply(self.w_next, self.test_buffer, self.w_next)

            #apply grad only after initializing RMSProp with the first gradient
            if not self.has_gradients:
                self.has_gradients = True
                #TODO: this should work
                #gpu.div(self.w_grad_next, batch_size, self.m_next)

            if self.config['parallelism'] != 'data':
                self.next_layer.weight_update()
Beispiel #3
0
    def create_weights(self):
        self.log_network()
        if self.next_layer:
            self.w_next = gpu.array(
                create_uniform_rdm_weight(self.unitcount,
                                          self.next_layer.unitcount))
            self.b_next = gpu.zeros((1, self.next_layer.unitcount))
            self.m_next = gpu.zeros(
                (self.unitcount, self.next_layer.unitcount))
            self.w_grad_next = gpu.zeros(
                (self.unitcount, self.next_layer.unitcount))
            self.b_grad_next = gpu.zeros((1, self.next_layer.unitcount))
            self.w_next_sync = gpu.zeros(
                (self.unitcount, self.next_layer.unitcount))
            if self.next_layer.config['compression'] == '1bit':
                self.errors = gpu.zeros_like(self.w_grad_next)
                self.posMask = gpu.zeros_like(self.w_grad_next)
                self.negMask = gpu.zeros_like(self.w_grad_next)
                self.w_grad_with_errors = gpu.zeros_like(self.w_grad_next)
                self.posCount = gpu.zeros((self.w_grad_next.shape_tensor[2], ))
                self.negCount = gpu.zeros((self.w_grad_next.shape_tensor[2], ))
                self.posAvg = gpu.zeros((self.w_grad_next.shape_tensor[2], ))
                self.negAvg = gpu.zeros((self.w_grad_next.shape_tensor[2], ))
            if self.next_layer.config['compression'] == '8bit':
                self.max_value_buffer = gpu.empty_like(self.w_grad_next)

            if self.next_layer: self.next_layer.create_weights()
Beispiel #4
0
 def create_weights(self):
     self.log_network()
     if self.next_layer:
         self.w_next = gpu.array(create_uniform_rdm_weight(self.unitcount,self.next_layer.unitcount))
         self.b_next = gpu.zeros((1, self.next_layer.unitcount))
         self.m_next = gpu.zeros((self.unitcount, self.next_layer.unitcount))
         self.w_grad_next = gpu.zeros((self.unitcount, self.next_layer.unitcount))
         self.b_grad_next = gpu.zeros((1, self.next_layer.unitcount))   
         self.w_next_sync = gpu.zeros((self.unitcount,self.next_layer.unitcount))  
         if self.next_layer.config['compression'] == '1bit':
             self.errors = gpu.zeros_like(self.w_grad_next)
             self.posMask = gpu.zeros_like(self.w_grad_next)
             self.negMask = gpu.zeros_like(self.w_grad_next)
             self.w_grad_with_errors = gpu.zeros_like(self.w_grad_next)
             self.posCount = gpu.zeros((self.w_grad_next.shape_tensor[2],))
             self.negCount = gpu.zeros((self.w_grad_next.shape_tensor[2],))
             self.posAvg = gpu.zeros((self.w_grad_next.shape_tensor[2],))
             self.negAvg = gpu.zeros((self.w_grad_next.shape_tensor[2],))
         if self.next_layer.config['compression'] == '8bit':    
             self.max_value_buffer = gpu.empty_like(self.w_grad_next)
                
         if self.next_layer: self.next_layer.create_weights()