def grad_loss(self, *args): """ Compute the gradient logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. - reg: (float) regularization strength. Returns: A tuple containing: - loss as a single float - gradient with respect to self.theta; an array of the same shape as theta """ theta,X,y,reg = args m,dim = X.shape grad = np.zeros((dim,)) ########################################################################## # Compute the gradient of the loss function for unregularized logistic # # regression # # TODO: 1 line of code expected # ########################################################################## grad = X.T.dot((utils.sigmoid(X.dot(theta)) - y)) / m + reg * theta / m grad[0] = X[:, :1].T.dot((utils.sigmoid(X.dot(theta)) - y)) / m ########################################################################### # END OF YOUR CODE # ########################################################################### return grad
def logistic_predict(weights, data): """ Compute the probabilities predicted by the logistic classifier. Note: N is the number of examples and M is the number of features per example. Inputs: weights: (M+1) x 1 vector of weights, where the last element corresponds to the bias (intercepts). data: N x M data matrix where each row corresponds to one data point. Outputs: y: :N x 1 vector of probabilities. This is the output of the classifier. """ # In case of MNIST classification of 4 and 9, output will be integer values # TODO: Finish this function N, M = data.shape y = [0]*N for i in range(0, N): z = 0 for j in range(0, M): z = z + weights[j] * data[i,j] + weights[-1] y[i] = sigmoid(z) #iprint 'z y[i]', z, y[i] augdata = np.ones((N, M+1)) augdata[:, :-1] = data z = np.dot(augdata, weights) # z is N x 1 y = sigmoid(z) return y
def cost(self, theta1, theta2): z1 = np.dot(self.train, theta1) a2 = utils.sigmoid(z1) a2 = np.append(np.ones((a2.shape[0],1)), a2, 1) z2 = np.dot(a2, theta2) h = utils.sigmoid(z2) return -sum(sum(self.goal*np.log(h) + (1-self.goal)*np.log(1-h)))/self.m
def forward(self, x_t, h_tm1, c_tm1): i_t = sigmoid(T.dot(x_t, self.W_xi) + T.dot(h_tm1, self.W_hi) + c_tm1 * self.W_ci) f_t = sigmoid(T.dot(x_t, self.W_xf) + T.dot(h_tm1, self.W_hf) + c_tm1 * self.W_cf) c_t = f_t * c_tm1 + i_t * self.activation(T.dot(x_t, self.W_xc) + T.dot(h_tm1, self.W_hc)) o_t = sigmoid(T.dot(x_t, self.W_xo) + T.dot(h_tm1, self.W_ho) + c_t * self.W_co) h_t = o_t * self.activation(c_t) return h_t, c_t
def _step(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf, bo, bc): i = sigmoid(T.dot(x_t, Wi) + T.dot(ht_1, Whi) + bi) f = sigmoid(T.dot(x_t, Wf) + T.dot(ht_1, Whf) + bf) o = sigmoid(T.dot(x_t, Wo) + T.dot(ht_1, Who) + bo) c = tanh(T.dot(x_t, Wc) + T.dot(ht_1, Whc) + bc) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def _step(x_t, ct_1, ht_1, W, Wh, b, dim): tmp = T.dot(x_t, W) + T.dot(ht_1, Wh) + b i = sigmoid(_slice(tmp, 0, dim)) f = sigmoid(_slice(tmp, 1, dim)) o = sigmoid(_slice(tmp, 2, dim)) c = tanh(_slice(tmp, 3, dim)) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def predict(self, newData=None): if newData is None: newData = self.train else: newData = np.append(np.ones((newData.shape[0],1)), newData, 1) z = utils.sigmoid(np.dot(newData, self.inputWeight)) z = np.append(np.ones((z.shape[0],1)), z, 1) digitProb = utils.sigmoid(np.dot(z, self.hiddenWeight)) return np.argmax(digitProb,1)
def _step_index(x_t, ct_1, ht_1, Wi, Wf, Wo, Wc, Whi, Whf, Who, Whc, bi, bf, bo, bc): # x_t: array of type int32 # use indexing on Wi, Wf, Wo and Wc matrices instead of computing the product with the one-hot representation of the input for computational and memory efficiency i = sigmoid(Wi[x_t] + T.dot(ht_1, Whi) + bi) f = sigmoid(Wf[x_t] + T.dot(ht_1, Whf) + bf) o = sigmoid(Wo[x_t] + T.dot(ht_1, Who) + bo) c = tanh(Wc[x_t] + T.dot(ht_1, Whc) + bc) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def _step_index(x_t, ct_1, ht_1, W, Wh, b, dim): # x_t: array of type int32 # use indexing on W matrix instead of computing dot product with the one-hot representation of the input for computational and memory efficiency tmp = W[x_t] + T.dot(ht_1, Wh) + b i = sigmoid(_slice(tmp, 0, dim)) f = sigmoid(_slice(tmp, 1, dim)) o = sigmoid(_slice(tmp, 2, dim)) c = tanh(_slice(tmp, 3, dim)) c_new = i * c + f * ct_1 h_new = o * tanh(c_new) return c_new, h_new
def forward(network, x): W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] a1 = np.dot(x, W1) + b1 z1 = sigmoid(a1) a2 = np.dot(z1, W2) + b2 z2 = sigmoid(a2) a3 = np.dot(z2, W3) + b3 y = identity_function(a3) return y
def get_reconstruction_cross_entropy(self): pre_sigmoid_activation_h = numpy.dot(self.input, self.W) + self.hbias sigmoid_activation_h = sigmoid(pre_sigmoid_activation_h) pre_sigmoid_activation_v = numpy.dot(sigmoid_activation_h, self.W.T) + self.vbias sigmoid_activation_v = sigmoid(pre_sigmoid_activation_v) cross_entropy = -numpy.mean( numpy.sum(self.input * numpy.log(sigmoid_activation_v) + (1 - self.input) * numpy.log(1 - sigmoid_activation_v), axis=1)) return cross_entropy
def _CD1(self, visible_data, weights, visible_bias, hidden_bias): N = np.shape(visible_data)[0] # Positive phase visible_state = visible_data if self.visible_type == "SIGMOID" : visible_state = self._samplebinary(visible_state) elif self.visible_type == "LINEAR" : visible_state = self._add_gaussian_noise(visible_state); nw = np.dot(visible_state, weights) + np.tile(hidden_bias, (N, 1)) if self.hidden_type == "SIGMOID": hidden_probability = u.sigmoid(nw) hidden_state = self._samplebinary(hidden_probability) elif self.hidden_type == "LINEAR": hidden_state = self._add_gaussian_noise(nw) gradient1 = self._gradient_weights(visible_state, hidden_state, weights) visible_biases1 = self._gradient_biases(visible_state, visible_bias) hidden_biases1 = self._gradient_biases(hidden_state, hidden_bias) # Negative phase # Skip sampling as well... visible_state = np.dot(hidden_state, weights.T) + np.tile(visible_bias, (N, 1)) if self.visible_type == "SIGMOID": visible_state = u.sigmoid(visible_state) #visible_probability = u.sigmoid(visible_state) #visible_state = self._samplebinary(visible_probability) # skip sampling here nw = np.dot(visible_state, weights) + np.tile(hidden_bias, (N, 1)) if self.hidden_type == "SIGMOID": hidden_probability = hidden_probability = u.sigmoid(nw) hidden_state = hidden_probability elif self.hidden_type == "LINEAR" : hidden_state = nw gradient2 = self._gradient_weights(visible_state, hidden_state, weights) visible_biases2 = self._gradient_biases(visible_state, visible_bias) hidden_biases2 = self._gradient_biases(hidden_state, hidden_bias) # gradients weights = gradient1 - gradient2; visible_biases = visible_biases1 - visible_biases2; hidden_biases= hidden_biases1 - hidden_biases2; return weights, visible_biases, hidden_biases
def train(set_, dimension, lambda_): temp_w = np.zeros(dimension) w0 = 0. w = temp_w # print ("Lambda: " + str(lambda_)) prev_error = 0. h = [0.5] * len(set_) current_error = calc_error(set_, w, lambda_, h, dimension) # num_iter = 0 while abs(current_error - prev_error) > 0.001: delta_w0 = 0. delta_w = np.zeros(dimension) # print ("Current error: " + str(current_error)) for i in range(len(set_)): h = utils.sigmoid(np.dot(set_[i][1], w) + w0) y = set_[i][0] delta_w0 += float(h) - y temp_x = (float(h) - y) * set_[i][1] delta_w = delta_w + temp_x n, error, w, w0 = line_search(set_, dimension, lambda_, w, w0, delta_w, delta_w0, current_error) # print ("Line search result: ") # print (str(w0) + " " + str(w)) if n == 0: break # num_iter += 1 prev_error = current_error current_error = error # print (current_error) # print ("Num iter: " + str(num_iter)) # print ("params found: " + str(w0) + str(w)) # print ("-------------------------------") # print() h = [float(utils.sigmoid(np.dot(tup[1], w) + w0)) for tup in set_] return w, w0, calc_error(set_, w, 0, h, dimension)
def minibatch_update(self,x,y,lr,regularization): n_sample = x.shape[0] info = x hidden_cache = [] for i in xrange(self.n_hidden + 1): if i == self.n_hidden: probs = softmax(info.dot(self.W[i]) + self.b[i]) else: info = sigmoid(info.dot(self.W[i]) + self.b[i]) hidden_cache.append(info) loss = neg_log_likelihood(probs,y) probs[np.arange(n_sample),y] -= 1.0 errors = probs for i in range(self.n_hidden,-1,-1): if i >= 1: hidden_out = hidden_cache[i - 1] grad_hidden_out = errors.dot(self.W[i].T) self.W[i] -= (lr * (hidden_out.T).dot(errors) + regularization * self.W[i]) self.b[i] -= lr * np.sum(errors,axis = 0) errors = hidden_out * (1 - hidden_out) * grad_hidden_out else: hidden_out = x self.W[i] -= (lr * (hidden_out.T).dot(errors) + regularization * self.W[i]) self.b[i] -= lr * np.sum(errors,axis = 0) return loss
def logistic(weights, data, targets, hyperparameters): """ Calculate negative log likelihood and its derivatives with respect to weights. Also return the predictions. Note: N is the number of examples and M is the number of features per example. Inputs: weights: (M+1) x 1 vector of weights, where the last element corresponds to bias (intercepts). data: N x M data matrix where each row corresponds to one data point. targets: N x 1 vector of binary targets. Values should be either 0 or 1. hyperparameters: The hyperparameters dictionary. Outputs: f: The sum of the loss over all data points. This is the objective that we want to minimize. df: (M+1) x 1 vector of derivative of f w.r.t. weights. y: N x 1 vector of probabilities. """ # TODO: Finish this function N, M = data.shape z = [0.0]*N df = np.zeros((M+1, 1)) f = 0.0 augdata = np.ones((N, M+1)) augdata[:, :-1] = data z = np.dot(augdata, weights) # z is N x 1 f = float(np.dot(np.transpose(1-targets), z)[0] + sum(np.log(np.exp(-z) + 1))) # f is scalar df = (np.dot(np.transpose(augdata), np.subtract(sigmoid(z).reshape((N, 1)), targets.reshape((N, 1))))) y = np.array(logistic_predict(weights, data)) return f, df, y
def loss(self, *args): """ Compute the logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. - reg: (float) regularization strength. Returns: A tuple containing: - loss as a single float - gradient with respect to self.theta; an array of the same shape as theta """ theta,X,y,reg = args m,dim = X.shape J = 0 ########################################################################## # Compute the loss function for regularized logistic regression # # TODO: 1-2 lines of code expected # ########################################################################## hx = utils.sigmoid(X.dot(theta)) J = -1*(np.log(hx).T.dot(y)+(np.log(1-hx)).T.dot(1-y)) / m + reg * theta[1:].T.dot(theta[1:]) / (2 * m) ########################################################################### # END OF YOUR CODE # ########################################################################### return J
def loss(self, *args): """ Compute the logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. Returns: loss as a single float """ theta,X,y = args m,dim = X.shape J = 0 ########################################################################## # Compute the loss function for unregularized logistic regression # # TODO: 1-2 lines of code expected # ########################################################################## hx = utils.sigmoid(X.dot(theta)) J = -1*(np.log(hx).T.dot(y)+(np.log(1-hx)).T.dot(1-y)) / m ########################################################################### # END OF YOUR CODE # ########################################################################### return J
def propup(self, v): # stacking 2d convolutions here along depth dimension # https://github.com/lmjohns3/py-rbm/blob/master/lmj/rbm.py seems # to use 1-d convolutions, and I'm not sure is that's ok # not going to escape a couple of loops though # using theano's conventions: # h is 4d matrix (num_examples, num_feature_maps, # feature_map_height, feature_map_width) # one feature map kinda corresponds to one hidden unit # by the same convention, v is 4d matrix too: (num_examples, # num_images per example (1, or 3 for RGB), image_height, # image_widht) # the same format is for weights: (number of feature maps for visible # layer (1 or 3), number of feature maps for hidden layer, # filter height, filter width) num_examples = v.shape[0] activations = np.zeros( ( num_examples, self.num_fm, self.img_height - self.fm_height + 1, self.img_width - self.fm_width + 1 ) ) for i in xrange(num_examples): for j in xrange(self.num_fm): activations[i, j, :, :] = convolve2d(v[i, 0, :, :], self.w[0, j, ::-1, ::-1], mode='valid') return sigmoid(activations + self.b_hid[None, :, None, None])
def predict(self, X): """ Use the trained weights of this linear classifier to predict labels for data points. Inputs: - X: m x d array of training data. Returns: - y_pred: Predicted output for the data in X. y_pred is a 1-dimensional array of length m, and each element is a class label from one of the set of labels -- the one with the highest probability """ y_pred = np.zeros(X.shape[0]) ########################################################################### # Compute the predicted outputs for X # # TODO: 2 lines of code expected # ########################################################################### hx = utils.sigmoid(X.dot(self.theta.T)) y_pred = np.argmax(hx, axis=1) ########################################################################### # END OF YOUR CODE # ########################################################################### return y_pred
def get_prediction(feature_vector, w): sum_val = w[0] for feature in feature_vector: feature_id = int(feature.split(':')[0]) # strength of each feature is 1.0 so we skip multiplying it. sum_val += w[feature_id] return sigmoid(sum_val)
def _get_p(self, xt, wt=None): if wt is None: wt = self._get_w(xt) wTx = sum(wt) # bounded sigmoid wTx = max(min(wTx, 20.), -20.) return sigmoid(wTx)
def feed_forward(self, train_input): self.layers[0].input = train_input self.layers[0].output = train_input for i in xrange(len(self.layers) - 1): self.layers[i + 1].input = (self.weights[i].transpose() * self.layers[i].output) + self.bias[i] self.layers[i + 1].output = sigmoid(self.layers[i + 1].input) return self.layers[-1].output
def loss(self, *args): """ Compute the logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. - reg: (float) regularization strength. Returns: A tuple containing: - loss as a single float - gradient with respect to self.theta; an array of the same shape as theta """ theta,X,y,reg = args m,dim = X.shape J = 0 ########################################################################## # Compute the loss function for regularized logistic regression # # TODO: 1-2 lines of code expected # ########################################################################## J = 1. / m * sum([-y[i] * np.log(utils.sigmoid(theta.dot(X[i]))) - (1 - y[i]) * np.log(1 - utils.sigmoid(theta.dot(X[i]))) for i in xrange(m)]) J += reg / (2. * m) * sum(theta[1:] ** 2) ########################################################################### # END OF YOUR CODE # ########################################################################### return J
def loss(self, *args): """ Compute the logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. Returns: loss as a single float """ theta,X,y = args m,dim = X.shape J = 0 ########################################################################## # Compute the loss function for unregularized logistic regression # # TODO: 1-2 lines of code expected # ########################################################################## J = 1. / m * sum([-y[i] * np.log(utils.sigmoid(theta.dot(X[i]))) - (1 - y[i]) * np.log(1 - utils.sigmoid(theta.dot(X[i]))) for i in xrange(m)]) ########################################################################### # END OF YOUR CODE # ########################################################################### return J
def grad_loss(self, *args): """ Compute the gradient logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. Returns: gradient with respect to theta; an array of the same shape as theta """ theta,X,y = args m,dim = X.shape grad = np.zeros((dim,)) ########################################################################## # Compute the gradient of the loss function for unregularized logistic # # regression # # TODO: 1 line of code expected # ########################################################################## for j in xrange(dim): grad[j] = 1. / m * sum([(utils.sigmoid(theta.dot(X[i])) - y[i]) * X[i][j] for i in xrange(m)]) ########################################################################### # END OF YOUR CODE # ########################################################################### return grad
def grad_loss(self, *args): """ Compute the gradient logistic loss function Inputs: - X: N x D array of data; each row is a data point. - y: 1-dimensional array of length N with real values. - reg: (float) regularization strength. Returns: A tuple containing: - loss as a single float - gradient with respect to self.theta; an array of the same shape as theta """ theta,X,y,reg = args m,dim = X.shape grad = np.zeros((dim,)) ########################################################################## # Compute the gradient of the loss function for unregularized logistic # # regression # # TODO: 1 line of code expected # ########################################################################## for j in xrange(dim): grad[j] = 1. / m * sum([(utils.sigmoid(theta.dot(X[i])) - y[i]) * X[i][j] for i in xrange(m)]) grad[j] += 1. * reg / m * theta[j] if j >= 1 else 0 ########################################################################### # END OF YOUR CODE # ########################################################################### return grad
def logistic(weights, data, targets, hyperparameters): """ Calculate negative log likelihood and its derivatives with respect to weights. Also return the predictions. Note: N is the number of examples and M is the number of features per example. Inputs: weights: (M+1) x 1 vector of weights, where the last element corresponds to bias (intercepts). data: N x M data matrix where each row corresponds to one data point. targets: N x 1 vector of binary targets. Values should be either 0 or 1. hyperparameters: The hyperparameters dictionary. Outputs: f: The sum of the loss over all data points. This is the objective that we want to minimize. df: (M+1) x 1 vector of derivative of f w.r.t. weights. y: N x 1 vector of probabilities. """ t = np.transpose(np.repeat(np.reshape(weights[:-1], (len(weights)-1, 1)), len(data), axis = 1)) f_e = data * t z_sums = np.sum(f_e, axis=1) y = sigmoid(z_sums +weights[-1]) f = np.sum(np.log(1 + np.exp(-z_sums - weights[-1])) + (1 - np.transpose(targets)) * (z_sums + weights[-1])) df = np.sum(data * np.transpose(((-np.exp(-z_sums - weights[-1]) / (1 + np.exp(-z_sums - weights[-1]))) + (1 - np.transpose(targets)))), axis = 0) df = np.append(df, np.sum(np.transpose(((-np.exp(-z_sums - weights[-1]) / (1 + np.exp(-z_sums - weights[-1]))) + (1 - np.transpose(targets)))), axis = 0)) df = np.reshape(df, ((len(df), 1))) return f, df, np.reshape(y, (len(y), 1))
def predict(self, X): """ Use the trained weights of this linear classifier to predict labels for data points. Inputs: - X: N x D array of training data. Each row is a D-dimensional point. Returns: - y_pred: Predicted output for the data in X. y_pred is a 1-dimensional array of length N, and each element is a real number. """ y_pred = np.zeros(X.shape[0]) ########################################################################### # Compute the predicted outputs for X # # TODO: 1 line of code expected # # # ########################################################################### y_pred = np.round(utils.sigmoid(self.theta.T.dot(X.T))) ########################################################################### # END OF YOUR CODE # ########################################################################### return y_pred
def think(self, inputs): cur = inputs states = [cur] for syn in self.synapses: cur = utils.sigmoid(np.dot(cur, syn)) states.append(cur) return states
def logistic_predict(weights, data): """ Compute the probabilities predicted by the logistic classifier. Note: N is the number of examples and M is the number of features per example. Inputs: weights: (M+1) x 1 vector of weights, where the last element corresponds to the bias (intercepts). data: N x M data matrix where each row corresponds to one data point. Outputs: y: :N x 1 vector of probabilities. This is the output of the classifier. """ # TODO: Finish this function y = [] b = weights[-1] w = weights[:-1] sig_list = [] #print ("len of data",len(data)) for i in xrange(len(data)): sig_val = sigmoid(np.dot(w.T, data[i]) + b) sig_list.append(sig_val) for i in sig_list: y.append(i) return y
def gradient(self, beta): """Summary Args: beta (TYPE): (D+1)-dim parameter (including bias) parametrising conditional probilities in logistic regression. Returns: TYPE: (D+1)-dim gradient of total loglikelihood with respect to beta """ sigmas = sigmoid(np.dot(self.X_ext, beta.T)) vect = (self.y - sigmas) * self.X_ext return sum(vect)
def policy_network_forward_pass(self, x): """Computes forward pass of the policy network. Policy network spits out a softmax over possible actions (i.e. P(going left) ) Network also uses relu activations in hidden layer """ h = np.dot( self.model['W1'], x.T ) # W1 is a row but x is also a row. we want a column so transpose h[h < 0] = 0 # relu on hidden state p_left = np.dot(self.model['W2'], h) p_left = utils.sigmoid(p_left) return p_left, h
def predict( X, weights ): # Used for predicting the output after the updation of weights to check the results. n = len(weights) // 2 for i in range(0, n - 1): Z = np.dot(X, weights["W" + str(i + 1)].T) + weights["b" + str(i + 1)].T A = utils.relu(Z) X = A Z = np.dot(X, weights["W" + str(n)].T) + weights["b" + str(n)].T A = utils.sigmoid(Z) return A
def excite(self, inputs, bias, filter_sigmoid=False): '''excites this layer with the inputs and the bias and returns the outputs as a tuple. if filter sigmoid is True, output from each neuron is filtered through the sigmoid response curve.''' outputs = [] for n in self: res = n.excite(inputs, bias) if filter_sigmoid: res = sigmoid(res) outputs.append(res) return outputs
def tensor_factorization_loss(self): # function to compute and monitor the tensor factorization loss (not directly minimized by the model) samples = [ np.sort(sample_without_replacement(n_population=vsize, n_samples=32)) for vsize in self.vsizes ] ijk_ = self.all_tuples_indices[self.all_tuples.isin(it.product(*samples[:-1]))] num = self.tensor[ijk_][:, samples[-1]] if self.sp: num = num.toarray() den = self.marginals[0][samples[0]] for i in range(1, self.order): den = np.tensordot(den, self.marginals[i][samples[i]], axes=0) PMI_np = np.log(num.ravel()/den.ravel() + 1e-30) batch = cartesian(samples) return np.sum(np.abs(sigmoid(PMI_np - np.log(self.k_neg))\ - sigmoid(self.model(tf.tuple([batch[:,i] for i in range(self.order)])))))
def _predict(self, Xi): """Auxiliary function of predict. Arguments: Xi {list} -- 1d list object with int or float Returns: float -- prediction of yi """ z = self._linear(Xi) return sigmoid(z)
def mean_pseudo_likelihood(self, x, idx): xfe = self.free_energy(x) # swaps the ith bit of each image # flip the i-th visible unit x[:, idx] = 1 - x[:, idx] nxfe = self.free_energy(x) # flip back the i-th visible unit x[:, idx] = 1 - x[:, idx] #logsig = np.log(sigmoid(nxfe - xfe)) cost = np.mean(self.nvisible * np.log(sigmoid(nxfe - xfe))) return cost
def __predict_nodes(self, nodes_idx: list, relations_idx: list, known_triples: set): scores = np.zeros((len(nodes_idx), self._gp.num_vertices)) doubler_score_node, doubler_score_doc = self._doubler.predict_nodes( nodes_idx, relations_idx) if self._normalize_score: doubler_score_node = utils.sigmoid(doubler_score_node) if doubler_score_doc is not None: doubler_score_doc = utils.sigmoid(doubler_score_doc) scores = np.sum([scores, doubler_score_node], axis=0) if doubler_score_doc is not None: scores = np.sum([scores, doubler_score_doc], axis=0) ranks = [] num_scores = len(scores) for idx, row in tqdm.tqdm(enumerate(scores), total=num_scores, desc='> Compute Ranking'): is_head = idx % 2 == 0 node_idx_given = nodes_idx[idx] relation_idx_given = relations_idx[idx] node_idx_wanted = nodes_idx[idx + 1] if is_head else nodes_idx[idx - 1] threshold_lower = row[node_idx_wanted] rank_cleaned = 1 for node_idx, score in enumerate(row): if score <= threshold_lower: continue elif is_head and not (node_idx_given, relation_idx_given, node_idx) in known_triples: rank_cleaned += 1 elif not is_head and not (node_idx, relation_idx_given, node_idx_given) in known_triples: rank_cleaned += 1 ranks.append(rank_cleaned) return ranks, scores
def sample(self, h_prev, c_prev, num_char): hs = np.copy(h_prev) cs = np.copy(c_prev) x = np.zeros((self.len_of_vocab, 1)) x[np.random.randint(0, self.len_of_vocab), 0] = 1 idxs = [] for _ in range(num_char): I = np.dot(self.Wi, x) + np.dot(self.Ri, hs) + self.Pi * cs + self.bi i_gate = sigmoid(I) F = np.dot(self.Wf, x) + np.dot(self.Rf, hs) + self.Pf * cs + self.bo f_gate = sigmoid(F) Z = np.dot(self.Wz, x) + np.dot(self.Rz, hs) + self.bz z = np.tanh(Z) cs = i_gate * z + f_gate * cs O = np.dot(self.Wo, x) + np.dot(self.Ro, hs) + self.Po * cs + self.bo o_gate = sigmoid(O) hs = o_gate * np.tanh(cs) if self.mode == 'lstm': out = np.dot(self.Wout, hs) + self.bout if self.mode == 'blstm': out = np.dot(self.Wout, hs) p = softmax(out) idx = np.random.choice(self.len_of_vocab, 1, p=p.ravel())[0] x = np.zeros((self.len_of_vocab, 1)) x[idx, 0] = 1 idxs.append(idx) print(''.join(idx_to_char[c] for c in idxs))
def loadGameData(self, filePath): numWeights = len(self.weights) ret = [] with open(filePath, 'r') as f: data = f.readlines() # cleanup data = [i.rstrip(' \n') for i in data] data = [i.rstrip('\n') for i in data] for i in range(self.numberOfGames): newGame = SingleGame() counter = 0 while data[counter] != 'END': newGame.movesList.append(data[counter]) counter += 1 try: featureValues = [ float(i) for i in data[counter].split(' ') ] except: print([i for i in data[counter].split(' ')]) sys.exit(0) if (counter % 4 == 3): #player 2's move, so features need to be interchanged t = len(featureValues) featureValues = featureValues[t / 2:] + featureValues[:t / 2] newGame.featureValuesList.append(featureValues) newGame.stateValues.append( sigmoid(dotProduct(featureValues, self.weights))) if (data[counter + 1] == 'END'): #Victory & loss score newGame.stateValues[ -1] = 1.0 #+ 0.01*newGame.featureValuesList[ -1][params.features.index("OppRingsCount")] newGame.stateValues[-2] = 0.0 try: assert (len( newGame.featureValuesList[-1]) == numWeights) except AssertionError: print(len(newGame.featureValuesList[-1])) print(numWeights) sys.exit(0) counter += 1 ret.append(newGame) return ret
def _forward_propagate(self, x, l): ai, i = np.concatenate(([1], x)), 0 acts = [ai] #Forward propagation while i < l: ai = sigmoid(np.dot(self.W[i], ai)) ai = np.concatenate(([1], ai)) acts.append(ai) i = i + 1 return acts
def sample(self, seed, number_of_characters_to_generate): h = self.h C = self.C x = char_to_one_hot(seed) ixes = [] for t in range(number_of_characters_to_generate): x = np.matmul(self.W_1, x) + self.b_1 f = sigmoid(np.matmul(self.W_f, np.concatenate((h, x))) + self.b_f) i = sigmoid(np.matmul(self.W_i, np.concatenate((h, x))) + self.b_i) C_hat = np.tanh( np.matmul(self.W_c, np.concatenate((h, x))) + self.b_c) C = f * C + i * C_hat o = sigmoid(np.matmul(self.W_o, np.concatenate((h, x))) + self.b_o) h = o * np.tanh(C) y = np.matmul(self.W_2, h) + self.b_2 p = np.exp(y) / np.sum(np.exp(y)) ix = np.random.choice(range(alphabet_size), p=p) x = np.zeros(alphabet_size) x[ix] = 1 ixes.append(ix) return indices_to_string(ixes)
def forward(self, h_prev, c_prev): self.hs = {} self.cs = {} self.i_gate = {} self.f_gate = {} self.o_gate = {} self.z = {} self.hs[-1] = np.copy(h_prev) self.cs[-1] = np.copy(c_prev) self.p = {} self.loss = 0 for t in range(self.time_steps): x = np.zeros((self.len_of_vocab, 1)) x[self.input[t], 0] = 1 I = np.dot(self.Wi, x) + np.dot( self.Ri, self.hs[t - 1]) + self.Pi * self.cs[t - 1] + self.bi self.i_gate[t] = sigmoid(I) F = np.dot(self.Wf, x) + np.dot( self.Rf, self.hs[t - 1]) + self.Pf * self.cs[t - 1] + self.bf self.f_gate[t] = sigmoid(F) Z = np.dot(self.Wz, x) + np.dot(self.Rz, self.hs[t - 1]) + self.bz self.z[t] = np.tanh(Z) self.cs[t] = self.i_gate[t] * self.z[t] + self.f_gate[t] * self.cs[ t - 1] O = np.dot(self.Wo, x) + np.dot( self.Ro, self.hs[t - 1]) + self.Po * self.cs[t] + self.bo self.o_gate[t] = sigmoid(O) self.hs[t] = self.o_gate[t] * np.tanh(self.cs[t]) if self.mode == 'lstm': out = np.dot(self.Wout, self.hs[t]) + self.bout self.p[t] = softmax(out) self.loss += -np.log(self.p[t][self.output[t], 0])
def forwardProp(self, input_vectors): """ input_vector can be tuple, list or ndarray """ for i in np.arange(1, len(self.structure)): layer = self.structure[i] prev_layer = self.structure[i - 1] if prev_layer['bias_node']: input_vectors = np.concatenate( (input_vectors, np.ones((input_vectors.shape[0], 1))), axis=1) # change 1. to self.bias? output_vectors = np.dot(layer['weight_matrix'], input_vectors.T).T # activation functions if layer['dropout_prob'] != 0: if layer['activation'] == 'relu': layer['output_vectors'] = layer['dropout_vector'] * relu( output_vectors) elif layer['activation'] == 'softmax': layer['output_vectors'] = layer[ 'dropout_vector'] * softmax(output_vectors.T).T elif layer['activation'] == 'sigmoid': layer['output_vectors'] = layer[ 'dropout_vector'] * sigmoid(output_vectors) else: raise Exception('Activation function not recognised') else: if layer['activation'] == 'relu': layer['output_vectors'] = relu(output_vectors) elif layer['activation'] == 'softmax': layer['output_vectors'] = softmax(output_vectors.T).T elif layer['activation'] == 'sigmoid': layer['output_vectors'] = sigmoid(output_vectors) else: raise Exception('Activation function not recognised') input_vectors = layer['output_vectors']
def predictions(self, beta, tau=0.5): """Summary Args: beta (TYPE): (D+1)-dim parameter (including bias) parametrising conditional probilities in logistic regression. tau (float, optional): threshold for predictions Returns: TYPE: 1-D array with predicted labels assigning 1 if conditional prob of 1 (given by logistic regression) > tau """ sigmas = sigmoid(np.dot(self.X_ext, beta.T)) self.y_pred = np.array(sigmas > tau * np.ones((self.N, 1)), dtype=int) return self.y_pred
def predict(self, X): Z = np.dot(self.w.T, X) + self.b A = sigmoid(Z) m = X.shape[1] Y_pred = np.zeros((1, m)) for i in range(A.shape[1]): if A[0, i] >= 0.5: Y_pred[0, i] = 1 else: Y_pred[0, i] = 0 return Y_pred
def fit(self, x, y, learningRate=0.001, noEpochs=1000): self.coef_ = [0.0 for _ in range(1 + len(x[0]))] # y = w0 + w1 * x1 + w2 * x2 + ... for epoch in range(noEpochs): for i in range(len(x)): # for each sample from the training data ycomputed = sigmoid(self.eval(x[i], self.coef_)) # estimate the output crtError = float(ycomputed) - float(y[i]) # compute the error for the current sample for j in range(0, len(x[0])): # update the coefficients self.coef_[j + 1] = self.coef_[j + 1] - learningRate * crtError * float(x[i][j]) self.coef_[0] = self.coef_[0] - learningRate * crtError * 1 self.intercept_ = self.coef_[0] self.coef_ = self.coef_[1:]
def forward_propagation(X, params): """ Args: X -- Input data of shape (n_x, m) n_x -> no of input nodes; m -> no of training examples params -- Dictionary with the initialized weights and the biases Returns: A2 -- Sigmoid output of 2nd or output layer of shape (n_y, m) neuron_functions -- Dictionary containing the computed linear function Z and activation function A for each neuron """ W1 = params["W1"] b1 = params["b1"] W2 = params["W2"] b2 = params["b2"] # Compute the linear function Z and sigmoid Activation function for each neuron Z1 = np.dot(W1, X) + b1 A1 = utils.sigmoid(Z1) Z2 = np.dot(W2, A1) + b2 A2 = utils.sigmoid(Z2) neuron_functions = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2} return A2, neuron_functions
def lstm_numpy(x, W, U, b): z = numpy.dot(x, W) + b n_hidden = b.shape[0] / 4 h = numpy.zeros((x.shape[0], n_hidden), dtype=x.dtype) prev_h = numpy.zeros(n_hidden, dtype=x.dtype) prev_c = numpy.zeros(n_hidden, dtype=x.dtype) def _slice(_x, n, dim): return _x[n * dim:(n + 1) * dim] for n in range(len(h)): preact = numpy.dot(prev_h, U) + z[n] i = utils.sigmoid(_slice(preact, 0, n_hidden)) f = utils.sigmoid(_slice(preact, 1, n_hidden)) o = utils.sigmoid(_slice(preact, 2, n_hidden)) c = utils.tanh(_slice(preact, 3, n_hidden)) c = f * prev_c + i * c h[n] = o * utils.tanh(c) prev_c = c prev_h = h[n] return h
def _linear_activation_forward(self, A_prev, W, b, activation): """ Get Activation A for a layer with given parameters. Returns cache to use in backward propagation. """ if activation == "sigmoid": Z, linear_cache = self._linear_forward(A_prev, W, b) A, activation_cache = sigmoid(Z) elif activation == "relu": Z, linear_cache = self._linear_forward(A_prev, W, b) A, activation_cache = relu(Z) cache = (linear_cache, activation_cache) return A, cache
def forward_propagation(self, input): """ Executes forward propagation. Notice that the z and a of the first layer (l = 0) are equal to the NN's input. :param input: input to the network. :type input: (num_inputs, 1) numpy matrix. :return z: values computed by applying weights and biases at each layer of the NN. :rtype z: 3-dimensional list of (num_neurons[l], 1) numpy matrices. :return a: activations computed by applying the activation function to z at each layer. :rtype a: 3-dimensional list of (num_neurons[l], 1) numpy matrices. """ z = [None] * 3 a = [None] * 3 z[0] = input a[0] = input # Add logic for neural network inference z[1] = self.weights[1] * a[0] + self.biases[1] a[1] = sigmoid(z[1]) z[2] = self.weights[2] * a[1] + self.biases[2] a[2] = sigmoid(z[2]) return z, a
def _loss_and_gradient_sigmoid(self, X, y): """ 二分类 sigmoid 使用GD计算梯度与损失 :param X: NxD m_samples, n_features np.ndarray :param y: N :return: """ m_samples, n_features = X.shape loss = 0.0 for m in range(m_samples): interaction = 0.0 for i in range(n_features): for j in range(i + 1, n_features): interaction += np.dot(self.V[i, self.featureToField[j]], self.V[j, self.featureToField[i]]) y_pred = self.wo + np.dot(self.W, X[m]) + interaction delta = (sigmoid(y[m] * y_pred) - 1) * y[m] # 更新参数 self.wo = self.wo - self.learning_rate * delta for i in range(n_features): if X[m, i] != 0: self.W[i] = self.W[i] - self.learning_rate * delta * X[m, i] for j in range(i + 1, n_features): self.V[i, self.featureToField[ j]] -= self.learning_rate * delta * self.V[ j, self.featureToField[i]] * X[m, i] * X[m, j] self.V[j, self.featureToField[ i]] -= self.learning_rate * delta * self.V[ i, self.featureToField[j]] * X[m, i] * X[m, j] # 计算loss loss += -np.log(sigmoid(y[m] * y_pred)) print(loss / m_samples)
def predict(X, user, parameters, layers_dim): A_prev = X L = len(layers_dim) for l in range(L-2): Z = np.dot(parameters["W" + str(l+1)], A_prev) + parameters["b" + str(l+1)] A = sigmoid(Z) A_prev = A ZL = np.dot(parameters["W" + str(L-1)], A_prev) + parameters["b" + str(L-1)] AL = softmax(ZL) print (AL) Y = np.argmax(AL) return Y
def lrCostFunction(theta, X, y, lambda_): m = y.size if y.dtype == bool: y = y.astype(int) J = 0 n=np.shape(theta)[0] theta=np.reshape(theta,(n,1)) grad = np.zeros(theta.shape) y=np.reshape(y,(np.shape(y)[0],1)) J=-(1/m)*(np.transpose(y).dot(np.log(utils.sigmoid(X.dot(theta))))+(np.transpose(1-y).dot(np.log(1-utils.sigmoid(X.dot(theta))))))+(lambda_/(2*m))*(np.transpose(theta[1:n]).dot(theta[1:n])) for i in range(0, n): a = 0 for j in range(0, m): a = a + ((utils.sigmoid(np.dot(X[j, :], np.reshape(theta, (n, 1)))) - y[j]) * X[j, i]) if i == 0: grad[i] = a / m else: grad[i] = a / m + (lambda_ / m) * theta[i] grad = np.reshape(grad,(1,n)) return J, grad
def _negative_phase(self, neg_data): """Evaluate the negative phase. Args: neg_data: shape = [`_batch_size`, `_num_hidden`] """ # Hidden states, using probability itself. Shape = [_batch_size, _num_hidden] neg_h_probs = utils.sigmoid(np.matmul(neg_data, self.weight) + np.tile(self.hidden_bias, (self._batch_size, 1))) self._neg_products = np.matmul(neg_data.transpose(), neg_h_probs) # Shape = [1, _num_visible] / [1, _num_hidden] self._neg_visible_act = np.mean(neg_data, axis=0) self._neg_hidden_act = np.mean(neg_h_probs, axis=0)
def get_batch(self, batch_size, delayed_reward, discount_factor): memory = zip(reversed(self.memory_sample[-batch_size:]), reversed(self.memory_action[-batch_size:]), reversed(self.memory_value[-batch_size:]), reversed(self.memory_policy[-batch_size:]), reversed(self.memory_reward[-batch_size:]), reversed(self.memory_target_policy[-batch_size:]), reversed(self.memory_target_action[-batch_size:]), reversed(self.memory_value2[-batch_size:])) sample_batch = np.zeros( (batch_size, self.num_steps, self.num_features)) x_sample_next = np.zeros( (batch_size, self.num_steps, self.num_features)) y_value = np.zeros((batch_size, self.agent.NUM_ACTIONS)) y_value2 = np.zeros((batch_size, self.agent.NUM_ACTIONS)) y_policy = np.full((batch_size, self.agent.NUM_ACTIONS), .5) y_target_value = np.zeros((batch_size, self.agent.NUM_ACTIONS)) y_target_policy = np.full((batch_size, self.agent.NUM_ACTIONS), .5) rewards = np.zeros(batch_size) action_batch = np.asarray([data[1] for data in memory]) value_max_next = 0 value_max_next2 = 0 target_max_next = 0 x_sample_next[0] = self.memory_sample[-1] reward_next = self.memory_reward[-1] for i, (sample, action, value, policy, reward,target_policy,target_action, value2) \ in enumerate(memory): sample_batch[i] = sample y_value[i] = value y_value2[i] = value2 y_policy[i] = policy q1_vals = self.critic.target_model1_predict(sample) q2_vals = self.critic.target_model2.predict(sample) y_target_value[i] = np.min(np.vstack( [q1_vals.transpose(), q2_vals.transpose()]), axis=0) y_target_policy[i] = target_policy rewards[i] = (delayed_reward + reward_next - reward * 2) * 100 y_target_value[i, target_action] = rewards[ i] + discount_factor * target_max_next # q_value # # y_target_policy[i, target_action] = sigmoid(target_value[target_action]) y_value[i, action] = value_max_next # q_value y_value2[i, action] = value_max_next2 # q_value y_policy[i, action] = sigmoid(y_value[action]) target_max_next = y_target_value.max() value_max_next = value.max() value_max_next2 = value2.max() reward_next = reward return sample_batch, y_policy, y_value, y_value2, y_target_value
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, output_layer_size, X, y, lambda_=0.0): t1 = nn_params[:(input_layer_size + 1) * hidden_layer_size].reshape( hidden_layer_size, input_layer_size + 1) t2 = nn_params[(input_layer_size + 1) * hidden_layer_size:].reshape( output_layer_size, hidden_layer_size + 1) m = X.shape[0] X = np.concatenate([np.ones((m, 1)), X], axis=1) z2 = (np.dot(X, t1.T)) a2 = utils.sigmoid(z2) ### 5000*25 z3 = (np.dot(np.concatenate([np.ones((m, 1)), a2], axis=1), t2.T)) h = utils.sigmoid(z3) y_matrix = np.zeros((m, output_layer_size)) for i in range(m): y_matrix[i, y[i]] = 1. ####regularized costFunction J = 1.*(1/m)*(-y_matrix*np.log(h) - (1-y_matrix)*np.log(1-h)).sum(axis=1).sum() \ + lambda_*(0.5/m)*((t1**2)[:,1:].sum().sum()+(t2**2)[:,1:].sum().sum()) ###back propagation delta3 = h - y_matrix ##5000*10 t2: 10*26 t1:25*401 delta2 = np.dot(delta3, t2) * np.concatenate( [np.zeros((m, 1)), a2 * (1 - a2)], axis=1) ## 5000*26 grad2 = (1/m) * np.dot(delta3.T,np.concatenate([np.ones((m,1)),a2], axis=1)) \ + (lambda_/m)*np.concatenate([np.zeros((t2.shape[0],1)),t2[:,1:]],axis=1) grad1 = (1/m) * np.dot(delta2.T[1:],X) \ + (lambda_/m)*np.concatenate([np.zeros((t1.shape[0],1)),t1[:,1:]],axis=1) grad = np.concatenate((grad1.ravel(), grad2.ravel())) return J, grad
def predictive_novelty(state): global Q, prev_state, prev_action, novelty_coeff, novelty_thresh ''' This function will implement predictive novelty motivation as described in P.Y.Oudeye Variables needed: previous estimate CURRENT ESTIMATE: THIS WILL BE PULLED FROM THE q VALUE this value will be then passed to the sigmoid function to get ''' # Find the next best action based om the current state best_option = np.argwhere(Q[state[0],state[1],:] == np.amax(Q[state[0],state[1],:])) num_options = len(best_option) best_option = (random.choice(best_option)) action = best_option[0] diff_array = np.zeros([4]) # this finds the new values for all the states. for a in range(len(Q[state[0], state[1], :])): diff_array[a] = utils.sigmoid(Q[state[0], state[1], a]) Q[state[0],state[1], a] =+ (diff_array[a] - novelty_thresh)*novelty_coeff #Feed the Q values to the sigmoid function. prev_estimate = utils.sigmoid(Q[prev_state[0], prev_state[1], prev_action]) this_estimate = utils.sigmoid(Q[state[0], state[1], action]) # make an array of deltas # print('Prev estimate ', prev_estimate) # print('This estimate ', this_estimate) # delta = this_estimate - prev_estimate diff_array = diff_array - 0.5 #print('Predictive novelty:', diff_array) delta = diff_array.max() # lets see what happens if I add this to the Q values? return delta
def getAvgLoss(self, W, V, usr2NonzeroCols, usr2negsNonzeroCols, usr2itemsIndx, pooler): loss = 0.0 cnt = 0 for usrid in usr2itemsIndx: try: usrloss = 0.0 usr_rep = pooler.pool_all(usr2itemsIndx[usrid], V) # 0. -log( sigmoid( usr_rep * sumedW_y) ) y_nonzeroCols = usr2NonzeroCols[usrid] sumedW_y = sumOverW(W, y_nonzeroCols) usrloss += (-1) * math.log( sigmoid(usr_rep.transpose().dot(sumedW_y))) # 1. summation log( sigmoid( usr_rep * sumedW_neg ) ) y_negsNonzeroCols = usr2negsNonzeroCols[usrid] sumedW_negs = map( lambda y_negNonzeroCols: sumOverW(W, y_negNonzeroCols). reshape(self.ITEM_FIELDS_NUM, 1), y_negsNonzeroCols) usrloss += (-1) * sum( map( lambda sumedW_neg: math.log( sigmoid( (-1) * usr_rep.transpose().dot(sumedW_neg))), sumedW_negs)) # 2. l2 norm l2norm = np.linalg.norm(W) + sum( map(lambda v: np.linalg.norm(v), V)) usrloss += l2norm loss += usrloss cnt += 1 except: loss += 0.0 cnt += 0 return loss / cnt
def __init__(self, link='softplus'): """ :param link: link function, either 'exp' or 'softplus' (note that the link is modified with an offset) """ super().__init__(hyp=None) if link == 'exp': self.link_fn = lambda mu: np.exp(mu - 0.5) self.dlink_fn = lambda mu: np.exp(mu - 0.5) elif link == 'softplus': self.link_fn = lambda mu: softplus(mu - 0.5) + 1e-10 self.dlink_fn = lambda mu: sigmoid(mu - 0.5) else: raise NotImplementedError('link function not implemented') self.name = 'Heteroscedastic Noise'