def weight_update(self): if self.next_layer: #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2]) batch_size = self.out.shape[2] if self.has_gradients: #x = self.w_grad_next.tocpu() #x[np.abs(x) < 0.005] = 0 #del self.w_grad_next #self.w_grad_next = gpu.array(x) lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt, ct.c_float(self.config['momentum']),ct.c_float(self.config['learning_rate']), batch_size) gpu.subtract(self.w_next, self.w_grad_next, self.w_next) if not self.test_buffer: self.test_buffer = gpu.empty_like(self.w_next) gpu.fill(self.test_buffer, 0.005) gpu.greater(self.w_next, self.test_buffer, self.test_buffer) gpu.multiply(self.w_next, self.test_buffer, self.w_next) #apply grad only after initializing RMSProp with the first gradient if not self.has_gradients: self.has_gradients = True #TODO: this should work #gpu.div(self.w_grad_next, batch_size, self.m_next) if self.config['parallelism'] != 'data': self.next_layer.weight_update()
def weight_update(self): if self.next_layer: #batch_size = ((self.out.shape[2]*gpu.gpu_count()) if self.config['parallelism'] == 'data' else self.out.shape[2]) batch_size = self.out.shape[2] if self.has_gradients: #x = self.w_grad_next.tocpu() #x[np.abs(x) < 0.005] = 0 #del self.w_grad_next #self.w_grad_next = gpu.array(x) lib.funcs.inp_RMSProp(self.m_next.pt, self.w_grad_next.pt, ct.c_float(self.config['momentum']), ct.c_float(self.config['learning_rate']), batch_size) gpu.subtract(self.w_next, self.w_grad_next, self.w_next) if not self.test_buffer: self.test_buffer = gpu.empty_like(self.w_next) gpu.fill(self.test_buffer, 0.005) gpu.greater(self.w_next, self.test_buffer, self.test_buffer) gpu.multiply(self.w_next, self.test_buffer, self.w_next) #apply grad only after initializing RMSProp with the first gradient if not self.has_gradients: self.has_gradients = True #TODO: this should work #gpu.div(self.w_grad_next, batch_size, self.m_next) if self.config['parallelism'] != 'data': self.next_layer.weight_update()
def create_weights(self): self.log_network() if self.next_layer: self.w_next = gpu.array( create_uniform_rdm_weight(self.unitcount, self.next_layer.unitcount)) self.b_next = gpu.zeros((1, self.next_layer.unitcount)) self.m_next = gpu.zeros( (self.unitcount, self.next_layer.unitcount)) self.w_grad_next = gpu.zeros( (self.unitcount, self.next_layer.unitcount)) self.b_grad_next = gpu.zeros((1, self.next_layer.unitcount)) self.w_next_sync = gpu.zeros( (self.unitcount, self.next_layer.unitcount)) if self.next_layer.config['compression'] == '1bit': self.errors = gpu.zeros_like(self.w_grad_next) self.posMask = gpu.zeros_like(self.w_grad_next) self.negMask = gpu.zeros_like(self.w_grad_next) self.w_grad_with_errors = gpu.zeros_like(self.w_grad_next) self.posCount = gpu.zeros((self.w_grad_next.shape_tensor[2], )) self.negCount = gpu.zeros((self.w_grad_next.shape_tensor[2], )) self.posAvg = gpu.zeros((self.w_grad_next.shape_tensor[2], )) self.negAvg = gpu.zeros((self.w_grad_next.shape_tensor[2], )) if self.next_layer.config['compression'] == '8bit': self.max_value_buffer = gpu.empty_like(self.w_grad_next) if self.next_layer: self.next_layer.create_weights()
def create_weights(self): self.log_network() if self.next_layer: self.w_next = gpu.array(create_uniform_rdm_weight(self.unitcount,self.next_layer.unitcount)) self.b_next = gpu.zeros((1, self.next_layer.unitcount)) self.m_next = gpu.zeros((self.unitcount, self.next_layer.unitcount)) self.w_grad_next = gpu.zeros((self.unitcount, self.next_layer.unitcount)) self.b_grad_next = gpu.zeros((1, self.next_layer.unitcount)) self.w_next_sync = gpu.zeros((self.unitcount,self.next_layer.unitcount)) if self.next_layer.config['compression'] == '1bit': self.errors = gpu.zeros_like(self.w_grad_next) self.posMask = gpu.zeros_like(self.w_grad_next) self.negMask = gpu.zeros_like(self.w_grad_next) self.w_grad_with_errors = gpu.zeros_like(self.w_grad_next) self.posCount = gpu.zeros((self.w_grad_next.shape_tensor[2],)) self.negCount = gpu.zeros((self.w_grad_next.shape_tensor[2],)) self.posAvg = gpu.zeros((self.w_grad_next.shape_tensor[2],)) self.negAvg = gpu.zeros((self.w_grad_next.shape_tensor[2],)) if self.next_layer.config['compression'] == '8bit': self.max_value_buffer = gpu.empty_like(self.w_grad_next) if self.next_layer: self.next_layer.create_weights()