def calculate(self, input=[]):
        input = numpy.append(input, self.bias)
        input = numpy.append(input, self.y_prev)

        self.suma_in = numpy.multiply(input, self.weights[0])
        self.suma_in = numpy.sum(self.suma_in, dtype=numpy.float32)
        self.y_in = utility.sigmoid(self.suma_in)

        self.suma_forget = numpy.multiply(input, self.weights[1])
        self.suma_forget = numpy.sum(self.suma_forget, dtype=numpy.float32)
        self.y_forget = utility.sigmoid(self.suma_forget)

        input = input[:-1]  # drop y_prev

        self.suma_mem = numpy.multiply(input, self.weights[2])
        self.suma_mem = numpy.sum(self.suma_mem, dtype=numpy.float32)
        self.mem = self.y_forget * self.mem + self.y_in * utility.tanh(
            self.suma_mem)

        self.suma_out = numpy.multiply(input, self.weights[3])
        self.suma_out = numpy.sum(self.suma_out, dtype=numpy.float32)
        self.y_out = utility.sigmoid(self.suma_out)

        self.output = utility.tanh(self.mem) * self.y_out

        self.y_prev = self.output

        return self.output
    def _cost_fmin(self, theta, X, y):
        m = X.shape[1]
        J = 0
        h = sigmoid(X@theta).reshape(X.shape[0], 1)
        J = 1/m * -((y.T @ np.log(h)) + ((1-y.T) @ np.log(1-h)))

        return J
    def predict(self, X):
        X = np.c_[np.ones((X.shape[0], 1)), X]

        predicted = []
        for i in range(X.shape[0]):
            predicted_y = sigmoid(X[i, :] @ self.thetas.T)
            predicted.append(np.argmax(predicted_y[:]))

        return predicted
    def forward_pass(self, input) -> numpy.ndarray:
        input = tf.reshape(input, self.in_shape)
        input = numpy.append(input, [1])  # append bias
        input = numpy.append(input, [self.previous_outputs])

        input_gates = [
            numpy.matmul(input, self.weights[w][0]) for w in range(self.size)
        ]
        input_gates = [numpy.sum(e) for e in input_gates]
        input_gates = [sigmoid(s) for s in input_gates]
        forget_gates = [
            numpy.matmul(input, self.weights[w][1]) for w in range(self.size)
        ]
        forget_gates = [numpy.sum(e) for e in forget_gates]
        forget_gates = [sigmoid(s) for s in forget_gates]
        memory_gates = [
            numpy.matmul(input, self.weights[w][2]) for w in range(self.size)
        ]
        memory_gates = [numpy.sum(e) for e in memory_gates]
        memory_gates = [numpy.delete(e, -1) for e in memory_gates
                        ]  # previous output not relevant in this gate
        memory_gates = [tanh(n) for n in memory_gates]

        self.memories = [
            forget_gates[cell] * self.memories[cell] +
            input_gates[cell] * memory_gates[cell] for cell in range(self.size)
        ]

        output_gates = [
            numpy.matmul(input, self.weights[w][3]) for w in range(self.size)
        ]
        output_gates = [numpy.sum(e) for e in output_gates]
        output_gates = [numpy.delete(e, -1) for e in output_gates
                        ]  # previous output not relevant in this gate
        output_gates = [sigmoid(s) for s in output_gates]

        self.outputs = [
            tanh(self.memories[cell]) * output_gates[cell]
            for cell in range(self.size)
        ]
        self.previous_outputs = numpy.asarray(self.outputs,
                                              dtype=numpy.float64)
        return numpy.asarray(self.outputs, dtype=numpy.float64)
    def _cost_function(self, theta, X, y, lambda_):
        m = X.shape[1]
        J = 0
        h = sigmoid(X@theta).reshape(X.shape[0], 1)
        J = 1/m * -((y.T @ np.log(h)) + ((1-y.T) @ np.log(1-h)))
        grad = sum(((h - y) * X)).T

        # J = self._cost_fmin(theta, X, y)
        # grad = self._grad_fmin(theta, X, y)

        return J, grad
    def forward_pass(self, input) -> numpy.ndarray:
        self.output = tf.reshape(input, self.in_shape)
        self.output = numpy.append(self.output, [1])
        self.output = [
            numpy.matmul(self.output, self.weights[w])
            for w in range(self.size)
        ]
        self.output = [numpy.sum(e) for e in self.output]
        self.output = [sigmoid(s) for s in self.output]

        return self.output
    def predict(self, X):
        m, n = X.shape
        X = np.concatenate((np.ones((m, 1)), X), axis=1)
        m, n = X.shape
        y_predict = np.zeros((X.shape[0], 1))
        theta = unravel(self.optimal_theta, self.units, self.hidden_layers,
                        n - 1)

        for i in range(m):
            a = np.array(X[i, :].T).reshape(n, 1)
            for j in range(self.hidden_layers + 1):
                z = np.array(theta[j] @ a)
                a = sigmoid(z)
                a = np.concatenate((np.ones((1, 1)), a), axis=0)
            a_L = a[1:]
            y_predict[i] = list(a_L).index(max(a_L))

        return y_predict
    def _forward_prop(self, X, y, theta, i):
        m, n = X.shape
        a_ = []
        z_ = []
        J = 0
        a = np.array(X[i, :].T).reshape(n, 1)
        a_.append(a)
        z_.append(np.concatenate((np.ones((1, 1)), a), axis=0))
        for j in range(self.hidden_layers + 1):
            z = np.array(theta[j] @ a)
            a = sigmoid(z)
            a = np.concatenate((np.ones((1, 1)), a), axis=0)
            a_.append(a)
            z_.append(z)

        a_L = a[1:]
        J = ((((y[i, :]) @ np.log(a_L)) + ((1 - y[i, :]) @ np.log(1 - a_L))) /
             m)

        return J, np.array(z_), np.array(a_)
    def _grad_fmin(self, theta, X, y):
        h = sigmoid(X@theta).reshape(X.shape[0], 1)
        grad = sum(((h - y) * X)).T

        return grad