def __init__(self, input_size, hidden_size, output_size, bptt_length):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.bptt_length = bptt_length
        self.delta = 0.1
        #self.weights_before=np.zeros((self.hidden_size+self.input_size+1,self.hidden_size))
        #self.weights_after=np.zeros((self.hidden_size+1,self.output_size))

        self.batch_size = 0

        self.first = True
        self.TBPTT = False

        self.fu_f = FullyConnected.FullyConnected(
            self.hidden_size + self.input_size, self.hidden_size)
        self.fu_i = FullyConnected.FullyConnected(
            self.hidden_size + self.input_size, self.hidden_size)
        self.fu_c = FullyConnected.FullyConnected(
            self.hidden_size + self.input_size, self.hidden_size)
        self.fu_o = FullyConnected.FullyConnected(
            self.hidden_size + self.input_size, self.hidden_size)
        self.fu_final = FullyConnected.FullyConnected(self.hidden_size,
                                                      self.output_size)

        self.sig_f = Sigmoid.Sigmoid()
        self.sig_i = Sigmoid.Sigmoid()
        self.tanh_c = TanH.TanH()
        self.sig_o = Sigmoid.Sigmoid()

        self.tanh_final = TanH.TanH()
        self.sig_final = Sigmoid.Sigmoid()

        self.state = (0, 0)
Esempio n. 2
0
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.trainable = True
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.last_hidden_state = np.zeros(self.hidden_size)
        self.stored_hidden_state = np.zeros(self.hidden_size)
        self.hidden_state_all = None
        self.hidden_state_all_with_ones = None
        self._state = False
        self._optimizer = None
        # 左乘
        self._weights = np.random.random(
            (self.hidden_size, self.hidden_size + self.input_size + 1))
        self.weights_yh = np.random.random(
            (self.output_size, self.hidden_size + 1))
        self.bias_y = self.weights_yh[:, -1]
        self.bias_h = self.weights[:, -1]

        self.gradient_by = np.zeros(self.output_size)
        self._gradient_weights = np.zeros_like(self._weights)
        self.gradient_h = None
        self.gradient_w_yh = np.zeros_like(self.weights_yh)

        self._weight_optimizer = None
        self._bias_optimizer = None
        self.input_tensor = None
        self.output_tensor = None
        self.arr_stacked_hx_with1 = None
        self.TanH = TanH.TanH()
        self.Sigmoid = Sigmoid.Sigmoid()
        self.sigmoid_acti = None
Esempio n. 3
0
    def __init__(self, input_size, hidden_size, output_size):
        '''
        :input_size: denotes the dimension of the input vector
        :hidden_size: denotes the dimension of the hidden state.
        '''
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.hidden_state = np.zeros((self.hidden_size))
        self.cell_state = np.zeros((self.hidden_size))

        # Sets the boolean state representing whether the RNN
        # regards subsequent sequences as a belonging to the same long sequence.
        self._memorize = False

        self._optimizer = None
        self._gradient_weights = 0

        # The weights are defined as the weights which are involved in calculating the
        # hidden state as a stacked tensor. E.g. if the hidden state is computed with
        # a single Fully Connected layer, which receives a stack of the hidden state
        # and the input tensor, the weights of this particular Fully Connected Layer,
        # are the weights considered to be weights for the whole class.
        self._weights = None

        self.sigmoid1 = Sigmoid.Sigmoid()
        self.sigmoid2 = Sigmoid.Sigmoid()
        self.sigmoid3 = Sigmoid.Sigmoid()
        self.sigmoid4 = Sigmoid.Sigmoid()
        self.tanh1 = TanH.TanH()
        self.tanh2 = TanH.TanH()
        self.fully_middle = FullyConnected.FullyConnected(
            input_size=input_size + hidden_size, output_size=4 * hidden_size)
        self.fully_out = FullyConnected.FullyConnected(input_size=hidden_size,
                                                       output_size=output_size)
Esempio n. 4
0
 def __init__(self, input_size, hidden_size, output_size, bptt_length):
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.output_size = output_size
     self.bptt_length = bptt_length
     #self.weights_before=np.zeros((self.hidden_size+self.input_size+1,self.hidden_size))
     #self.weights_after=np.zeros((self.hidden_size+1,self.output_size))
     self.fu1 = FullyConnected.FullyConnected(
         self.hidden_size + self.input_size, self.hidden_size)
     self.fu2 = FullyConnected.FullyConnected(self.hidden_size,
                                              self.output_size)
     self.tanh = TanH.TanH()
     self.sig = Sigmoid.Sigmoid()
     self.delta = 0.1
     self.optimizer = None
     self.TBPTT = False
Esempio n. 5
0
import sys,os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from Layers.Add import *
from Layers.Mutiple import *
from Layers.Sigmoid import *
from Layers.Affine import *
from Layers.SoftMaxWithLoss import *

# z = sigma((w * x) + b ) 
w = 1
x = 1
b = 1

multiple = Mutiple()
add = Add()
sigmoid = Sigmoid()

z = multiple.forward(w, x)
print("z =" ,z)
h = add.forward(z,b)
print("h =" ,h)
y = sigmoid.forward(h)
print("y =" ,y)

dy_h = sigmoid.backward(1)
print("dy/dh =" ,dy_h)
dy_z = add.backward(dy_h)
print("dy/db =",dy_z[1])
print("dy/dwx =" ,dy_z[0])

dy_w,dy_x = multiple.backward(dy_z[0])
Esempio n. 6
0
    def forward(self, input_tensor):
        if self.first_update:
            # initialize cell state tensor
            self.first_update = False

            for t in range(input_tensor.shape[0]):
                # initialize a list of activation function
                self.tanh_cell.append(TanH.TanH())
                self.sigmoid_update.append(Sigmoid.Sigmoid())
                self.sigmoid_forget.append(Sigmoid.Sigmoid())
                self.sigmoid_output.append(Sigmoid.Sigmoid())
                self.tanh_hidden.append(TanH.TanH())
                self.sigmoid_final.append(Sigmoid.Sigmoid())

                # initialize a list of fully connected layers
                self.first_X.append(np.array(0))
                self.final_X.append(np.array(0))

        self.hidden_state = np.zeros(
            (input_tensor.shape[0] + 1, self.hidden_size))
        self.cell_state = np.zeros(
            (input_tensor.shape[0] + 1, self.hidden_size))

        # switch between TBPTT and BPTT
        if self.__memorize:
            self.hidden_state[0] = self.preHidden_state_forward
            self.cell_state[0] = self.preCell_state_forward
        else:
            self.preHidden_state_forward = np.zeros(self.hidden_size)
            self.preCell_state_forward = np.zeros(self.hidden_size)

        # initialize
        output_tensor = np.zeros((input_tensor.shape[0], self.output_size))

        self.output_gate_tensor = np.zeros(
            (input_tensor.shape[0], self.hidden_size))
        self.cell_to_hidden_tensor = np.zeros(
            (input_tensor.shape[0], self.hidden_size))
        self.update_tensor = np.zeros(
            (input_tensor.shape[0], self.hidden_size))
        self.forget_tensor = np.zeros(
            (input_tensor.shape[0], self.hidden_size))
        self.cell_tensor = np.zeros((input_tensor.shape[0], self.hidden_size))
        self.log = np.zeros((input_tensor.shape[0], self.hidden_size))
        for t in range(input_tensor.shape[0]):
            # for the Notation:
            # self.hidden_state[t] ---> hidden_state at time t-1
            # self.cell_state[t] ---> cell_state at time t-1
            # otherwise: a[t] ----> a at time t

            # Concatenate hidden_state and input ---> [ht−1,xt]
            concatenated_tensor = np.hstack(
                (self.hidden_state[t], input_tensor[t]))

            concatenated_tensor = concatenated_tensor.reshape(
                1, concatenated_tensor.shape[0])
            four_tensors = self.first_layer.forward(concatenated_tensor)
            four_tensors = four_tensors[0]
            self.first_X[
                t] = self.first_layer.X  # store the gradient w.r.t. weights

            # spilt the output of the first FC layer
            forget_tensor = four_tensors[:self.hidden_size]
            update_tensor = four_tensors[self.hidden_size:self.hidden_size * 2]
            cell_tensor = four_tensors[self.hidden_size * 2:self.hidden_size *
                                       3]
            output_gate_tensor = four_tensors[self.hidden_size * 3:]

            # temp cell state: ˜Ct = tanh(WC ·[ht−1,xt] + bC)
            cell_tensor = self.tanh_cell[t].forward(cell_tensor)
            self.cell_tensor[t] = cell_tensor

            # update_gate: it = σ (Wi ·[ht−1,xt] + bi)
            update_tensor = self.sigmoid_update[t].forward(update_tensor)
            self.update_tensor[t] = update_tensor

            # forget_gate: ft = σ (Wf ·[ht−1,xt] + bf)
            # forget_tensor = self.forget_gate.forward(concatenated_tensor)
            forget_tensor = self.sigmoid_forget[t].forward(forget_tensor)
            self.forget_tensor[t] = forget_tensor

            # output_gate: ot = σ (Wo ·[ht−1,xt] + bo)
            output_gate_tensor = self.sigmoid_output[t].forward(
                output_gate_tensor)
            self.output_gate_tensor[t] = output_gate_tensor

            # Ct = ft · Ct−1 + it · ˜Ct
            self.preCell_state_forward = update_tensor * cell_tensor + forget_tensor * self.cell_state[
                t]
            self.cell_state[t + 1] = self.preCell_state_forward

            # cell state to hidden state : ht = ot ·tanh(Ct)
            cell_to_hidden_tensor = self.tanh_hidden[t].forward(
                self.cell_state[t + 1])
            self.cell_to_hidden_tensor[t] = cell_to_hidden_tensor
            self.preHidden_state_forward = output_gate_tensor * cell_to_hidden_tensor
            self.hidden_state[t + 1] = self.preHidden_state_forward

            # output :yt = σ (Wy ·ht + by)
            hidden_temp = self.hidden_state[t + 1].reshape(1, self.hidden_size)
            output_temp = self.final_layer.forward(hidden_temp)
            output_tensor[t] = output_temp[0]

            self.final_X[
                t] = self.final_layer.X  # store the gradient w.r.t. weights
            output_tensor[t] = self.sigmoid_final[t].forward(output_tensor[t])

        return output_tensor
Esempio n. 7
0
    def __init__(self, input_size, hidden_size, output_size, bptt_lenght):
        #region Sizes
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.bptt_length = bptt_lenght
        #endregion Sizes

        self.error_ht1 = np.zeros([self.bptt_length, self.hidden_size])
        self.error_cell = np.zeros([self.bptt_length, self.hidden_size])

        self.learning_rate = 0.001
        self.hidden_state = np.zeros([self.bptt_length, self.hidden_size])
        self.cell_state = np.zeros([self.bptt_length, self.hidden_size])

        #region Sigmoids
        self.sigmoid_f = []
        self.sigmoid_i = []
        self.sigmoid_o = []

        for i in np.arange(0, self.bptt_length):
            a = Sigmoid.Sigmoid()
            self.sigmoid_f.append(a)
        for i in np.arange(0, self.bptt_length):
            a = Sigmoid.Sigmoid()
            self.sigmoid_i.append(a)
        for i in np.arange(0, self.bptt_length):
            a = Sigmoid.Sigmoid()
            self.sigmoid_o.append(a)
        #endregion

        #region TanH
        self.tanh_o = []
        self.tanh_c = []

        for i in np.arange(0, self.bptt_length):
            c = TanH.TanH()
            self.tanh_o.append(c)
        for i in np.arange(0, self.bptt_length):
            c = TanH.TanH()
            self.tanh_c.append(c)
        #endregion

        #region FullyConnected
        self.list_fully_connected_y = []
        self.list_fully_connected_fico = []

        for i in np.arange(0, self.bptt_length):
            a = FullyConnected.FullyConnected(
                self.hidden_size + self.input_size, 4 * self.hidden_size)
            self.list_fully_connected_fico.append(a)

        for i in np.arange(0, self.bptt_length):
            a = FullyConnected.FullyConnected(self.hidden_size, output_size)
            self.list_fully_connected_y.append(a)

        self.initialize(Initializers.He(), Initializers.He())
        #endregion FullyConnected

        #region Metavariables
        self.same_sequence = False
        self.has_optimizer = False
        #endregion

        #region Optimizer
        self.optimizer = None
        #endregion Optimizer

        #region Input for Multiplication
        self.f = np.zeros([self.bptt_length, self.hidden_size])
        self.i = np.zeros([self.bptt_length, self.hidden_size])
        self.c_tilde = np.zeros([self.bptt_length, self.hidden_size])
        self.tanhO = np.zeros([self.bptt_length, self.hidden_size])
        self.o = np.zeros([self.bptt_length, self.hidden_size])