def __init__(self, input_size, hidden_size, output_size, bptt_length): self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.bptt_length = bptt_length self.delta = 0.1 #self.weights_before=np.zeros((self.hidden_size+self.input_size+1,self.hidden_size)) #self.weights_after=np.zeros((self.hidden_size+1,self.output_size)) self.batch_size = 0 self.first = True self.TBPTT = False self.fu_f = FullyConnected.FullyConnected( self.hidden_size + self.input_size, self.hidden_size) self.fu_i = FullyConnected.FullyConnected( self.hidden_size + self.input_size, self.hidden_size) self.fu_c = FullyConnected.FullyConnected( self.hidden_size + self.input_size, self.hidden_size) self.fu_o = FullyConnected.FullyConnected( self.hidden_size + self.input_size, self.hidden_size) self.fu_final = FullyConnected.FullyConnected(self.hidden_size, self.output_size) self.sig_f = Sigmoid.Sigmoid() self.sig_i = Sigmoid.Sigmoid() self.tanh_c = TanH.TanH() self.sig_o = Sigmoid.Sigmoid() self.tanh_final = TanH.TanH() self.sig_final = Sigmoid.Sigmoid() self.state = (0, 0)
def __init__(self, input_size, hidden_size, output_size): super().__init__() self.trainable = True self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.last_hidden_state = np.zeros(self.hidden_size) self.stored_hidden_state = np.zeros(self.hidden_size) self.hidden_state_all = None self.hidden_state_all_with_ones = None self._state = False self._optimizer = None # 左乘 self._weights = np.random.random( (self.hidden_size, self.hidden_size + self.input_size + 1)) self.weights_yh = np.random.random( (self.output_size, self.hidden_size + 1)) self.bias_y = self.weights_yh[:, -1] self.bias_h = self.weights[:, -1] self.gradient_by = np.zeros(self.output_size) self._gradient_weights = np.zeros_like(self._weights) self.gradient_h = None self.gradient_w_yh = np.zeros_like(self.weights_yh) self._weight_optimizer = None self._bias_optimizer = None self.input_tensor = None self.output_tensor = None self.arr_stacked_hx_with1 = None self.TanH = TanH.TanH() self.Sigmoid = Sigmoid.Sigmoid() self.sigmoid_acti = None
def __init__(self, input_size, hidden_size, output_size): ''' :input_size: denotes the dimension of the input vector :hidden_size: denotes the dimension of the hidden state. ''' super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.hidden_state = np.zeros((self.hidden_size)) self.cell_state = np.zeros((self.hidden_size)) # Sets the boolean state representing whether the RNN # regards subsequent sequences as a belonging to the same long sequence. self._memorize = False self._optimizer = None self._gradient_weights = 0 # The weights are defined as the weights which are involved in calculating the # hidden state as a stacked tensor. E.g. if the hidden state is computed with # a single Fully Connected layer, which receives a stack of the hidden state # and the input tensor, the weights of this particular Fully Connected Layer, # are the weights considered to be weights for the whole class. self._weights = None self.sigmoid1 = Sigmoid.Sigmoid() self.sigmoid2 = Sigmoid.Sigmoid() self.sigmoid3 = Sigmoid.Sigmoid() self.sigmoid4 = Sigmoid.Sigmoid() self.tanh1 = TanH.TanH() self.tanh2 = TanH.TanH() self.fully_middle = FullyConnected.FullyConnected( input_size=input_size + hidden_size, output_size=4 * hidden_size) self.fully_out = FullyConnected.FullyConnected(input_size=hidden_size, output_size=output_size)
def __init__(self, input_size, hidden_size, output_size, bptt_length): self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.bptt_length = bptt_length #self.weights_before=np.zeros((self.hidden_size+self.input_size+1,self.hidden_size)) #self.weights_after=np.zeros((self.hidden_size+1,self.output_size)) self.fu1 = FullyConnected.FullyConnected( self.hidden_size + self.input_size, self.hidden_size) self.fu2 = FullyConnected.FullyConnected(self.hidden_size, self.output_size) self.tanh = TanH.TanH() self.sig = Sigmoid.Sigmoid() self.delta = 0.1 self.optimizer = None self.TBPTT = False
def __init__(self, input_size, hidden_size, output_size, bptt_length): self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.bptt_length = bptt_length # (batch size bzw. time dimension) self.hidden_state = np.zeros([self.bptt_length, self.hidden_size]) self.same_sequence = False #TODO Probabily not needed # at = b + h(t-1)*Wh + xt*Wx = b + ah + ax self.ax = [np.zeros(self.hidden_size)] * self.bptt_length self.ah = [np.zeros(self.hidden_size)] * self.bptt_length self.a = [np.zeros(self.hidden_size)] * self.bptt_length # parameters for backward self.hidden_gradients = np.zeros( [self.bptt_length + 1, self.hidden_size]) # error which should be past out of the whole RNN self.error_xt = np.zeros([self.bptt_length, self.input_size]) # error from h self.error_ht = np.zeros([self.bptt_length, self.hidden_size]) # init the optimizer parameter self.has_optimizer = False self.optimizer = None self.learning_rate = 1 # init output with zeros self.output = np.zeros([self.bptt_length, self.output_size]) self.list_fully_connected_xhh = [] self.list_fully_connected_hy = [] self.list_tanh = [] for i in np.arange(0, self.bptt_length): a = FullyConnected.FullyConnected( self.hidden_size + self.input_size, self.hidden_size) self.list_fully_connected_xhh.append(a) for i in np.arange(0, self.bptt_length): b = FullyConnected.FullyConnected(self.hidden_size, self.output_size) self.list_fully_connected_hy.append(b) for i in np.arange(0, self.bptt_length): c = TanH.TanH() self.list_tanh.append(c) # initialize the weights self.hy_weight_gradients = np.zeros( [self.bptt_length, self.hidden_size + 1, self.output_size]) self.xhh_weight_gradients = np.zeros([ self.bptt_length, self.hidden_size + 1 + self.input_size, self.hidden_size ]) self.hy_weights = np.random.rand(self.hidden_size + 1, self.output_size) self.xhh_weights = np.random.rand( self.hidden_size + 1 + self.input_size, self.hidden_size) for layer in self.list_fully_connected_xhh: layer.set_weights(self.xhh_weights) for layer in self.list_fully_connected_hy: layer.set_weights(self.hy_weights)
def forward(self, input_tensor): if self.first_update: # initialize cell state tensor self.first_update = False for t in range(input_tensor.shape[0]): # initialize a list of activation function self.tanh_cell.append(TanH.TanH()) self.sigmoid_update.append(Sigmoid.Sigmoid()) self.sigmoid_forget.append(Sigmoid.Sigmoid()) self.sigmoid_output.append(Sigmoid.Sigmoid()) self.tanh_hidden.append(TanH.TanH()) self.sigmoid_final.append(Sigmoid.Sigmoid()) # initialize a list of fully connected layers self.first_X.append(np.array(0)) self.final_X.append(np.array(0)) self.hidden_state = np.zeros( (input_tensor.shape[0] + 1, self.hidden_size)) self.cell_state = np.zeros( (input_tensor.shape[0] + 1, self.hidden_size)) # switch between TBPTT and BPTT if self.__memorize: self.hidden_state[0] = self.preHidden_state_forward self.cell_state[0] = self.preCell_state_forward else: self.preHidden_state_forward = np.zeros(self.hidden_size) self.preCell_state_forward = np.zeros(self.hidden_size) # initialize output_tensor = np.zeros((input_tensor.shape[0], self.output_size)) self.output_gate_tensor = np.zeros( (input_tensor.shape[0], self.hidden_size)) self.cell_to_hidden_tensor = np.zeros( (input_tensor.shape[0], self.hidden_size)) self.update_tensor = np.zeros( (input_tensor.shape[0], self.hidden_size)) self.forget_tensor = np.zeros( (input_tensor.shape[0], self.hidden_size)) self.cell_tensor = np.zeros((input_tensor.shape[0], self.hidden_size)) self.log = np.zeros((input_tensor.shape[0], self.hidden_size)) for t in range(input_tensor.shape[0]): # for the Notation: # self.hidden_state[t] ---> hidden_state at time t-1 # self.cell_state[t] ---> cell_state at time t-1 # otherwise: a[t] ----> a at time t # Concatenate hidden_state and input ---> [ht−1,xt] concatenated_tensor = np.hstack( (self.hidden_state[t], input_tensor[t])) concatenated_tensor = concatenated_tensor.reshape( 1, concatenated_tensor.shape[0]) four_tensors = self.first_layer.forward(concatenated_tensor) four_tensors = four_tensors[0] self.first_X[ t] = self.first_layer.X # store the gradient w.r.t. weights # spilt the output of the first FC layer forget_tensor = four_tensors[:self.hidden_size] update_tensor = four_tensors[self.hidden_size:self.hidden_size * 2] cell_tensor = four_tensors[self.hidden_size * 2:self.hidden_size * 3] output_gate_tensor = four_tensors[self.hidden_size * 3:] # temp cell state: ˜Ct = tanh(WC ·[ht−1,xt] + bC) cell_tensor = self.tanh_cell[t].forward(cell_tensor) self.cell_tensor[t] = cell_tensor # update_gate: it = σ (Wi ·[ht−1,xt] + bi) update_tensor = self.sigmoid_update[t].forward(update_tensor) self.update_tensor[t] = update_tensor # forget_gate: ft = σ (Wf ·[ht−1,xt] + bf) # forget_tensor = self.forget_gate.forward(concatenated_tensor) forget_tensor = self.sigmoid_forget[t].forward(forget_tensor) self.forget_tensor[t] = forget_tensor # output_gate: ot = σ (Wo ·[ht−1,xt] + bo) output_gate_tensor = self.sigmoid_output[t].forward( output_gate_tensor) self.output_gate_tensor[t] = output_gate_tensor # Ct = ft · Ct−1 + it · ˜Ct self.preCell_state_forward = update_tensor * cell_tensor + forget_tensor * self.cell_state[ t] self.cell_state[t + 1] = self.preCell_state_forward # cell state to hidden state : ht = ot ·tanh(Ct) cell_to_hidden_tensor = self.tanh_hidden[t].forward( self.cell_state[t + 1]) self.cell_to_hidden_tensor[t] = cell_to_hidden_tensor self.preHidden_state_forward = output_gate_tensor * cell_to_hidden_tensor self.hidden_state[t + 1] = self.preHidden_state_forward # output :yt = σ (Wy ·ht + by) hidden_temp = self.hidden_state[t + 1].reshape(1, self.hidden_size) output_temp = self.final_layer.forward(hidden_temp) output_tensor[t] = output_temp[0] self.final_X[ t] = self.final_layer.X # store the gradient w.r.t. weights output_tensor[t] = self.sigmoid_final[t].forward(output_tensor[t]) return output_tensor
def __init__(self, input_size, hidden_size, output_size, bptt_lenght): #region Sizes self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.bptt_length = bptt_lenght #endregion Sizes self.error_ht1 = np.zeros([self.bptt_length, self.hidden_size]) self.error_cell = np.zeros([self.bptt_length, self.hidden_size]) self.learning_rate = 0.001 self.hidden_state = np.zeros([self.bptt_length, self.hidden_size]) self.cell_state = np.zeros([self.bptt_length, self.hidden_size]) #region Sigmoids self.sigmoid_f = [] self.sigmoid_i = [] self.sigmoid_o = [] for i in np.arange(0, self.bptt_length): a = Sigmoid.Sigmoid() self.sigmoid_f.append(a) for i in np.arange(0, self.bptt_length): a = Sigmoid.Sigmoid() self.sigmoid_i.append(a) for i in np.arange(0, self.bptt_length): a = Sigmoid.Sigmoid() self.sigmoid_o.append(a) #endregion #region TanH self.tanh_o = [] self.tanh_c = [] for i in np.arange(0, self.bptt_length): c = TanH.TanH() self.tanh_o.append(c) for i in np.arange(0, self.bptt_length): c = TanH.TanH() self.tanh_c.append(c) #endregion #region FullyConnected self.list_fully_connected_y = [] self.list_fully_connected_fico = [] for i in np.arange(0, self.bptt_length): a = FullyConnected.FullyConnected( self.hidden_size + self.input_size, 4 * self.hidden_size) self.list_fully_connected_fico.append(a) for i in np.arange(0, self.bptt_length): a = FullyConnected.FullyConnected(self.hidden_size, output_size) self.list_fully_connected_y.append(a) self.initialize(Initializers.He(), Initializers.He()) #endregion FullyConnected #region Metavariables self.same_sequence = False self.has_optimizer = False #endregion #region Optimizer self.optimizer = None #endregion Optimizer #region Input for Multiplication self.f = np.zeros([self.bptt_length, self.hidden_size]) self.i = np.zeros([self.bptt_length, self.hidden_size]) self.c_tilde = np.zeros([self.bptt_length, self.hidden_size]) self.tanhO = np.zeros([self.bptt_length, self.hidden_size]) self.o = np.zeros([self.bptt_length, self.hidden_size])