class Net(object): # Read, Write activation_method = None # Readonly output_value = None # 输出值 output_partial = None previous_output = None previous_output_partial = None def __init__(self, has_recurrent=False): self.weights = [] # <number> self.recurrent_weights = [] # <number> self.bias = 0.0 self.delta_value = 0.0 # Current delta value will be next delta value. self.has_recurrent = has_recurrent # Has recurrent inputs ? (hidden net has recurrent, but output net not. self.activation = Activation( ) # 活化函式的 Get, Set 都在这里: net.activation.method. # 有另外开 self.activation_method 来方便存取 self.output = NetOutput() self.timesteps = [] # <Timestep Object> # weights<number> def reset_weights(self, weights=[]): if not weights: return self.weights = np.copy(weights).tolist() # recurrent_weights<number> def reset_recurrent_weights(self, recurrent_weights=[]): if not recurrent_weights: return self.recurrent_weights = np.copy(recurrent_weights).tolist() def weight_for_index(self, index=0): return self.weights[index] def recurrent_weight_for_index(self, index=0): return self.recurrent_weights[index] # bias 也在这里归零 def remove_all_weights(self): del self.weights[:] del self.recurrent_weights[:] self.bias = 0.0 # Randomizing weights and bias. def randomize_weights(self, random_count=1, min=-0.5, max=0.5): del self.weights[:] self.bias = 0.0 random = np.random for i in range(0, random_count): self.weights.append(random.uniform(min, max)) self.bias = random.uniform(min, max) def randomize_recurrent_weights(self, random_count=1, min=-0.5, max=0.5): if self.has_recurrent: del self.recurrent_weights[:] random = np.random for i in range(0, random_count): self.recurrent_weights.append(random.uniform(min, max)) # Net output. (Hidden net with recurrent, Output net without recurrent) def net_output(self, inputs=[], recurrent_outputs=[]): # 先走一般前馈(Forward)至 Hidden Layer summed_singal = np.dot(inputs, self.weights) + self.bias # 如果有递归层,再走递归(Recurrent) 至 Hidden Layer if len(recurrent_outputs) > 0: summed_singal += np.dot(recurrent_outputs, self.recurrent_weights) # 神经元输出 output_value = self.activation.activate(summed_singal) self.output.add_sum_input(summed_singal) self.output.add_output_value(output_value) return output_value def clear(self): self.output.refresh() # For hidden layer nets to calculate their delta weights with recurrent layer, # and for output layer nets to calculate their delta weights without recurrent layer. # layer_outputs: hidden layer outputs or output layer outputs. def calculate_delta_weights(self, learning_rate=1.0, layer_outputs=[], recurrent_outputs=[]): # 利用 Timestep 来当每一个 BP timestep 算权重修正值时的记录容器 timestep = Timestep() # For delta bias. timestep.delta_bias = learning_rate * self.delta_value # For delta of weights. for weight_index, weight in enumerate(self.weights): # To calculate and delta of weight. last_layer_output = layer_outputs[weight_index] # SGD: new w = old w + (-learning rate * delta_value * x) # -> x 可为 b[t][h] (hidden output) 或 b[t-1][h] (recurrent output) 或 x[i] (input feature) # Output layer 的 delta_value = aE/aw[hk] = -error value * f'(net) # Hidden layer 的 delta_value = aE/aw[ih] = SUM(delta_value[t][hk] * w[hk] + SUM(delta_value[t+1][h'h] * w delta_weight = learning_rate * self.delta_value * last_layer_output timestep.add_delta_weight(delta_weight) # For delta of recurrent weights. (Noted: Output Layer is without Recurrent) for recurrent_index, recurrent_weight in enumerate( self.recurrent_weights): last_recurrent_output = recurrent_outputs[recurrent_index] recurrent_delta_weight = learning_rate * self.delta_value * last_recurrent_output timestep.add_recurrent_delta_weight(recurrent_delta_weight) self.timesteps.append(timestep) def renew_weights(self, new_weights=[], new_recurrent_weights=[]): if not new_weights and not new_recurrent_weights: return self.remove_all_weights() self.reset_weights(new_weights) self.reset_recurrent_weights(new_recurrent_weights) def renew_bias(self): sum_changes = 0.0 for timestep in self.timesteps: sum_changes += timestep.delta_bias # new b(j) = old b(j) + [-L * -delta(j)] self.bias += sum_changes # Renew weights and bias. def renew(self): # 累加每个 Timestep 里相同 Index 的 Delta Weight new_weights = [] new_recurrent_weights = [] for weight_index, weight in enumerate(self.weights): # For normal weight. sum_delta_changes = 0.0 for timestep in self.timesteps: sum_delta_changes += timestep.delta_weight(weight_index) new_weight = weight + sum_delta_changes new_weights.append(new_weight) for recurrent_index, recurrent_weight in enumerate( self.recurrent_weights): # for recurrent weight. sum_recurrent_changes = 0.0 for timestep in self.timesteps: sum_recurrent_changes += timestep.recurrent_delta_weight( recurrent_index) new_recurrent_weight = recurrent_weight + sum_recurrent_changes new_recurrent_weights.append(new_recurrent_weight) self.renew_weights(new_weights, new_recurrent_weights) self.renew_bias() del self.timesteps[:] self.delta_value = 0.0 # 一定要归零, 因为走 BPTT 的原故 ''' @ Getters that all Readonly ''' @property # The last output value from output_values. def output_value(self): return self.output.last_output_value @property # The last output value partial from output_values. def output_partial(self): # 是线性输出的活化函式(e.g. SGN, ReLU ... etc.),必须用输入信号(Sum Input)来求函式偏微分 # 非线性的活化函式则用输出值(Output Value)来求偏微。 output_value = self.output.last_sum_input if self.activation.is_linear == True else self.output_value return self.activation.partial(output_value) @property # The last moment output. e.g. b[t-1][h] def previous_output(self): return self.output.previous_output @property # 上一刻的输出偏微分 def previous_output_partial(self): output_value = self.output.previous_sum_input if self.activation.is_linear == True else self.previous_output return self.activation.partial(output_value) @property def activation_method(self): return self.activation.method ''' @ Setter ''' @activation_method.setter def activation_method(self, method): self.activation.method = method
class Neuron: # Private usage. __iteration_times = 0 # 迭代次数 __iteration_error = 0.0 #迭代误差总和 def __init__(self): self.tag = self.__class__.__name__ self.samples = [] # 所有的训练样本(特征值) self.targets = [] # 范本的目标输出 self.weights = [] # 权重 self.bias = 0.0 # 偏权值 self.learning_rate = 1.0 # 学习速率 self.max_iteration = 1 # 最大迭代数 self.convergence = 0.001 # 收敛误差 self.activation = Activation() # Iteration Cost Function: 每个完整迭代运算后,把每一个训练样本的cost function取平均(用于判断是否收敛) def _iteration_cost_function(self): # 1/2 * (所有训练样本的cost function总和 / (训练样本数量 * 每笔训练样本的目标输出数量)) return 0.5 * (self.__iteration_error / (len(self.samples) * 1)) # 训练样本的Cost Function: 由于会在 _iteration_cost_function()计算迭代的cost function时去统一除 1/2。 # 故在这里计算训练样本的cost function 时不除以 1/2。 def _cost_function(self, error_value=0.0): self.__iteration_error += (error_value**2) def _net_input(self, features=[]): return np.dot(features, self.weights) def _net_output(self, net_input=0.0): return self.activation.activate(net_input) def _start(self, iteration, completion): self.__iteration_times += 1 self.__iteration_error += 0.0 # 这里刻意反每一个步骤都写出来,一步步的代算清楚流程 for index, features in enumerate(self.samples): # Forward target_value = self.targets[index] net_input = self._net_input(features) net_output = self._net_output(net_input) # Backward error_value = target_value - net_output derived_activation = self.activation.partial(net_output) # Calculates cost function of the training sample. self._cost_function(error_value) # Updates all weights, the formula: # delta_value = -(target value - net output) * f'(net) # delta_weight = -learning rate * delta_value * x1 (Noted: 这里 learning rate 和 delta_value的负号会相) # new weights, e.g. new s1 = old w1 + delta_weight w1 delta_value = error_value * derived_activation delta_weights = np.multiply(self.learning_rate * delta_value, features) new_weights = np.add(self.weights, delta_weights) self.weights = new_weights # Finished an iteration then adjusts conditions if (self.__iteration_times >= self.max_iteration) or ( self._iteration_cost_function() <= self.convergence): if not completion is None: completion(self.__iteration_times, self.weights) else: if not iteration is None: iteration(self.__iteration_times, self.weights) self._start(iteration, completion) # One training sample: features -> one target def add_pattern(self, features=[], target=0): # If features is not an array that still working on here if not features: return # samples[features array] # targets[target value] self.samples.append(features) self.targets.append(target) def initialize_weights(self, weights=[]): if not weights: return self.weights = weights # 全零的初始权重 def zero_weights(self): if not self.samples: return length = len(self.samples[0]) for i in range(length): self.weights.append(0.0) def randomize_weights(self, min=0.0, max=1.0): # Float random = np.random input_count = len(self.samples[0]) weights = [] for i in range(0, input_count): weights.append(random.uniform(min, max)) self.initialize_weights(weights) # iteration and completion are callback functions def training(self, iteration, completion): self.__iteration_times = 0 self.__iteration_error = 0.0 self._start(iteration, completion) def predict(self, features=[]): return self._net_output(self._net_input(features))