def find_values(data, weights, output, level): if (level == 2): wynik = sigmoid(sum(weights * data)) return wynik for i in range(len(weights)): output[i] = sigmoid(sum(weights[i] * data)) return output
def lstm_cell_forward(self, xt, a_prev, c_prev): ''' Arguments: xt -- your input data at timestep "t", numpy array of shape (n_x, m). a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m) c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m) parameters -- python dictionary containing: Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x) bf -- Bias of the forget gate, numpy array of shape (n_a, 1) Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x) bi -- Bias of the update gate, numpy array of shape (n_a, 1) Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x) bc -- Bias of the first "tanh", numpy array of shape (n_a, 1) Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x) bo -- Bias of the output gate, numpy array of shape (n_a, 1) Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a) by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1) Returns: a_next -- next hidden state, of shape (n_a, m) c_next -- next memory state, of shape (n_a, m) yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m) cache -- tuple of values needed for the backward pass, contains (a_next, c_next, a_prev, c_prev, xt, parameters) Note: ft/it/ot stand for the forget/update/output gates, cct stands for the candidate value (c tilde), c stands for the memory value ''' # Retrieve dimensions from shapes of xt and Wy n_x, m = xt.shape n_y, n_a = self.Wy.shape # Concatenate a_prev and xt (≈3 lines) concat = np.zeros([n_x + n_a, m]) concat[: n_a, :] = a_prev concat[n_a:, :] = xt # Compute values for ft, it, cct, c_next, ot, a_next using the formulas given figure (4) (≈6 lines) ft = sigmoid(np.dot(self.Wf, concat) + self.bf) it = sigmoid(np.dot(self.Wi, concat) + self.bi) cct = np.tanh(np.dot(self.Wc, concat) + self.bc) c_next = ft * c_prev + it * cct ot = sigmoid(np.dot(self.Wo, concat) + self.bo) a_next = ot * np.tanh(c_next) # Compute prediction of the LSTM cell (≈1 line) yt_pred = softmax(np.dot(self.Wy, a_next) + self.by) # store values needed for backward propagation in cache cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt) return a_next, c_next, yt_pred, cache
def forward(self, inputs): ''' Perform a forward pass of LSTM using the given inputs. :param inputs: is an array of one hot vectors with shape (input_size, 1). :return: final output and hidden state. ''' n_y, n_h = self.Wy.shape y = np.zeros((n_y, 1)) h = np.zeros((n_h, 1)) c = np.zeros((n_h, 1)) self.last_hs = {0: h} # save hidden state self.last_cs = {0: c} # save cell state self.last_inputs = inputs self.caches = {} # Perform each step of the LSTM for i, x in enumerate(inputs): n_x, m = x.shape # Concatenate hidden state and input concat = np.zeros((n_x + n_h, 1)) concat[:n_h, :] = h concat[n_h:, :] = x ft = sigmoid(np.dot(self.Wf, concat) + self.bf) it = sigmoid(np.dot(self.Wi, concat) + self.bi) cct = np.tanh(np.dot(self.Wc, concat) + self.bc) c = ft * c + it * cct self.last_cs[i + 1] = c ot = sigmoid(np.dot(self.Wo, concat) + self.bo) h = ot * np.tanh(c) self.last_hs[i + 1] = h cache = (h, c, self.last_hs[i], self.last_cs[i], ft, it, cct, ot, x) self.caches[i] = cache y = np.dot(self.Wy, h) + self.by return y
def run_model(self): if self.silent is False: print("Starting energy: {:.3e}".format(self.energy_start)) print("Starting cluster distance: {:.3e}".format(self.cluster_dist_start)) print("Running model ... ") RolemodelLearning.relax_structure(self) rolemodel_index = RolemodelLearning.choose_rolemodels(self) RolemodelLearning.plot_struct_clustering(self, rolemodel_index) for iteration in range(1, self.niter + 1): if self.silent is False: print("\tIteration {}".format(iteration)) RolemodelLearning.rattle_structure(self) RolemodelLearning.minimize_cluster_distance(self) rolemodel_index = RolemodelLearning.choose_rolemodels(self) RolemodelLearning.plot_struct_clustering(self, rolemodel_index) RolemodelLearning.relax_structure(self) self.grades[rolemodel_index] += 2*sigmoid(self.energies[-3] - self.energies[-1]) - 1 self.grade_results = np.append(self.grade_results, self.grades[np.newaxis], axis=0) rolemodel_index = RolemodelLearning.choose_rolemodels(self) RolemodelLearning.plot_struct_clustering(self, rolemodel_index) if self.global_minimum is not None and np.isclose(self.energies[-1], self.global_minimum, atol=self.atol): self.global_min_iter = iteration break if self.global_min_iter == 0: self.global_min_iter = 1.1 * self.niter if self.silent is False: print("Done!") if self.global_min_iter is not None: if self.global_min_iter < self.niter + 1: print("\nThe global minimum was found at iteration number {}!".format(self.global_min_iter)) else: print("\nThe global minimum was not reached..") else: print("\nThe minimum energy is: {:.3e}".format(self.energy_opt)) print("A change in energy is found to be: {:.3e}".format(self.energy_opt - self.energy_start)) print("\nThe cluster distance is: {:.3e}".format(self.cluster_dist_opt)) print("A change in cluster distance is found to be: {:.3e}".format( self.cluster_dist_opt - self.cluster_dist_start) )
def predict(self, X): if self.weights is None: raise Exception("Not trained yet !") return maths.sigmoid(np.dot(X, self.weights) + self.bias)