def softmax_sanity(): # if __name__ == '__main__': # Sanity checks for softmax. If these fail, your softmax is definitely wrong. # If these pass, it may or may not be correct. print("running softmax tests") test1 = ll.softmax(np.array([1, 2])) test2 = ll.softmax(np.array([1001, 1002])) test3 = ll.softmax(np.array([-1001, -1002])) print("test1: {}".format(test1)) assert np.amax(np.fabs(test1 - np.array([0.26894142, 0.73105858]))) <= 1e-6 print("test2: {}".format(test2)) assert np.amax(np.fabs(test2 - np.array([0.26894142, 0.73105858]))) <= 1e-6 print("test3: {}".format(test3)) assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6 print("softmax tests passed")
def forward_propagation_action(x,params): # first load the model parameters w1 = params[0] b1 = params[1] w2 = params[2] b2 = params[3] #print('w1dotx:', np.dot(w1,x).shape) #print('x:', x.shape) # compute Z1: input layer matrix dot w1 wheight matrix plus our bias z1 = np.dot(w1,x)+b1 #print('z1:', z1.shape) # put it throgh our activition function A1 = than(z1) # print('A1:', A1.shape) # compute Z2: z2 =np.dot(w2,A1)+b2 # print('z2:', z2.shape) # now, we'll use the softmax as our activition function A2 = ll.softmax(z2) # print('A2:', A2.shape) # save all results as a model result_model = {'A0': x, 'z1': z1, 'A1': A1, 'z2': z2, 'A2': A2} return result_model
def classifier_output(x, params): # YOUR CODE HERE. W, b, U, b_tag = params # f(x) = softmax(Utanh(Wx+b)+b') h = np.tanh(np.dot(x, W) + b) probs = softmax(np.dot(h, U) + b_tag) return probs
def classifier_output(x, params): W = params[0] b = params[1] U = params[2] b_tag = params[3] eq = np.dot(U, (np.tanh(np.dot(W, x) + b))) + b_tag return ll.softmax(eq)
def classifier_output(x, params): # YOUR CODE HERE. h = x for i in range(0, len(params), 2): z = np.dot(h, params[i]) + params[i + 1] h = np.tanh(z) probs = ll.softmax(z) return probs
def classifier_output(x, params): # YOUR CODE HERE. W, b, U, b_tag = params post_activation = get_post_activation(x, params) post_second_layer = np.dot(post_activation, U) + b_tag from loglinear import softmax probs = softmax(post_second_layer) return probs
def classifier_output(x, params): # YOUR CODE HERE. W = params[0] b = params[1] U = params[2] b_tag = params[3] probs = ll.softmax(np.dot(U, (np.tanh(np.dot(W, x) + b))) + b_tag) return probs
def classifier_output(x, params): # YOUR CODE HERE. W = params[0] b = params[1] scores = np.dot(x, W) + b hidden_set = [(U, b_tag) for U, b_tag in zip(params[2::2], params[3::2])] for U, b_tag in hidden_set: scores = np.dot(np.tanh(scores), U) + b_tag return ll.softmax(scores)
def classifier_output(x, params): # YOUR CODE HERE. W, b, U, b_tag = params linear = np.dot(x, W) + b tan = np.tanh(linear) probs = loglinear.softmax(np.dot(tan, U) + b_tag) return probs
def classifier_output(x, params): out = x for W, b in zip(params[::2], params[1::2]): hidden = out.dot(W) + b out = np.tanh(hidden) probs = softmax(hidden) return probs
def classifier_output(x, params): out = x hidden_layer_inputs = [] for W, b in zip(params[::2], params[1::2]): hidden_layer_inputs.append(out) hidden_layer = out.dot(W) + b out = np.tanh(hidden_layer) probs = softmax(hidden_layer), hidden_layer_inputs return probs
def classifier_output(x, params): # YOUR CODE HERE. W, b, U, b_tag = params z = mat_vec_mul(x, W) + b h = tanh(z) o = mat_vec_mul(h, U) + b_tag probs = softmax(o) return probs, [z, h, o]
def classifier_output(x, params): # YOUR CODE HERE. activation_i = x num_of_parameters = len(params) #loop with jumping 2, to get each time currents w and b for index in range(0, num_of_parameters, 2): matrix = params[index + 1] + np.dot(activation_i, params[index]) activation_i = np.tanh(matrix) probs = log_linear.softmax(matrix) return probs
def classifier_output(x, params): """ Return the output layer (class probabilities) of a log-linear classifier with given params on input x. """ W, b = params W_t = W.transpose() mult = np.dot(W_t, x) grades = mult + b probs = softmax(grades) return probs
def classifier_output(x, params): """ Calculate multi level with one hidden layer :param x: x :param params: W, b, U, b_tag :return: vector pf probabilities """ W, b, U, b_tag = params hidden = np.tanh(np.dot(x, W) + b) mult = np.dot(hidden, U) + b_tag probs = ll.softmax(mult) return probs
def classifier_output(x, params): x = np.array(x).reshape(-1, 1) W, b, U, b_tag = params Z_hid = np.dot(W.T, x) + b.reshape(-1, 1) # [hid_dim, 1] V_hid = tanh(Z_hid) # [hid_dim, 1] Z_out = np.dot(U.T, V_hid) + b_tag.reshape(-1, 1) # [out_dim, 1] V_out = softmax(Z_out) # [out_dim, 1] probs = V_out return probs
def hidden_layers(x, params): aggregation = [] activation = [x] out = x for W, b in zip(params[::2], params[1::2]): aggr = out.dot(W) + b out = np.tanh(aggr) aggregation.append(aggr) activation.append(out) y_pred = softmax(aggr) del activation[-1] return y_pred, aggregation, activation
def classifier_output(x, params): index = 0 p = params_to_couples(params) vec = x for (param1, param2) in p: # last params just (Wx + b) if index + 1 == len(p): vec = (np.dot(vec, param1)) + param2 break # every layer params calculated as tanh(Wx + b) vec = np.tanh(np.dot(vec, param1) + param2) index += 1 # all goes into softmax(x) for predictions vec probs = ll.softmax(vec) return probs
def classifier_output(x, params): # YOUR CODE HERE #creating convinient lists for W0, W1,..,Wn parameters and b1,b2,...,bn parameters param_W = [params[i] for i in range(0, len(params), 2)] param_B = [params[i] for i in range(1, len(params), 2)] hi = x #performing forward propagation for i in range(len(param_W) - 1): hi = np.tanh(np.dot(hi, param_W[i]) + param_B[i]) #computing the output in the last layer(with softmax). probs = ll.softmax( np.dot(hi, param_W[len(param_W) - 1]) + param_B[len(param_B) - 1]) return probs
def loss_and_gradients(x, y, params): """ params: a list of the form [W, b, U, b_tag] returns: loss,[gW, gb, gU, gb_tag] loss: scalar gW: matrix, gradients of W gb: vector, gradients of b gU: matrix, gradients of U gb_tag: vector, gradients of b_tag """ W, b, U, b_tag = params out_dim = U.shape[1] # first, do the entire feedforward shape_b = b.shape if len(shape_b) == 1: if config.debug: print("b should be a 2d array, actual shape: {}".format(shape_b)) b = np.array(b, np.double, ndmin=2) a2 = layer_output(x, [W, b]) z3 = np.dot(a2, U) + b_tag y_hat = ll.softmax(z3) y_e = ll.to_one_hot_row(y, out_dim) y_diff = y_hat - y_e #print("feedforward complete.") # now we can use backprop gU = np.dot(a2.transpose(), y_diff) gb_tag = y_diff.copy() delta2 = np.dot(y_diff, U.T) delta2 = delta2 * (1 - a2**2) gW = np.dot(x.transpose(), delta2) gb = delta2 loss = ll.logloss(y_e, y_hat) return loss, [gW, gb, gU, gb_tag]
def classifier_output(x, params): """" Calculate all layers with tanh, last layer with softmax params: x, params returns: vector of probabilities """ W = params[0] b = params[1] # calculate first layer layer = np.dot(x, W) + b # calculate hidden layers for i in range(2, len(params), 2): layer = np.dot(np.tanh(layer), params[i]) + params[i + 1] # calculate last layer probs = ll.softmax(layer) return probs
def classifier_output(x, params): # YOUR CODE HERE. current_input = x hidden_outputs = [current_input] pred_W, pred_b = params[-2:] hidden_params = params[:-2] for i in range(0, len(hidden_params), 2): W = hidden_params[i] b = hidden_params[i + 1] z = mlp1.mat_vec_mul(current_input, W) + b h = mlp1.tanh(z) current_input = h hidden_outputs.append(z) hidden_outputs.append(h) logits = mlp1.mat_vec_mul(current_input, pred_W) + pred_b probs = loglinear.softmax(logits) return probs, hidden_outputs
def classifier_output(x, params): # YOUR CODE HERE. # z is the layer before activate the activation function. z_layers = [] # h is the layer after activation function. h_layers = [] h = x for index in range(0, len(params), 2): w = params[index] b = params[index + 1] z = np.dot(h, w) z = np.add(z, b) h = np.tanh(z) z_layers.append(z) h_layers.append(h) h_layers.pop() z_layers.pop() probs = softmax(z) return probs, z_layers, h_layers
def classifier_output(x, params): global Z, V x = np.array(x).reshape(-1, 1) Z = [np.zeros(1)] V = [x] L = int((len(params) / 2) - 1) for l in range(L): W, b = params[l * 2], params[(l * 2) + 1] Z_hid = np.dot(W.T, V[l]) + b.reshape(-1, 1) # [hid_dim, 1] V_hid = tanh(Z_hid) # [hid_dim, 1] Z.append(Z_hid) V.append(V_hid) W, b = params[L * 2], params[(L * 2) + 1] Z_out = np.dot(W.T, V[L]) + b.reshape(-1, 1) # [out_dim, 1] V_out = softmax(Z_out) # [out_dim, 1] V.append(Z_out) Z.append(V_out) return V_out
def classifier_output(x, params): p = list(params) mlp1_vec = np.dot(p[2], (np.tanh(np.dot(p[0], x) + p[1]))) + p[3] probs = ll.softmax(mlp1_vec) return probs
def classifier_output(x, params): W, b, U, b_tag = params calc_tanh = np.vectorize(tanh) return ll.softmax(np.dot(calc_tanh(np.dot(x, W) + b), U) + b_tag)
def classifier_output(x, params): U, W, b, b_tag = params # compute the softmax with the given parameters h_layer_tanh = np.tanh(np.dot(x, W) + b) probs = softmax(np.dot(h_layer_tanh, U) + b_tag) return probs
def classifier_output(x, params): W, b, U, b_tag = params x_tag = np.tanh(x.dot(W) + b) probs = ll.softmax(x_tag.dot(U) + b_tag) return probs
def classifier_output(x, params): # params = W, b, U, b_tag. # YOUR CODE HERE. W, b, U, b_tag = params probs = ll.softmax(np.dot(np.tanh(np.dot(x, W) + b), U) + b_tag) return probs
a flat list of 4 elements, W, b, U, b_tag. """ W = init_random_weights((in_dim, hid_dim)) b = init_random_weights(hid_dim) U = init_random_weights((hid_dim, out_dim)) b_tag = init_random_weights(out_dim) params = [W, b, U, b_tag] return params if __name__ == '__main__': # Sanity checks for softmax. If these fail, your softmax is definitely wrong. # If these pass, it may or may not be correct. test1 = loglinear.softmax(np.array([1, 2])) print test1 assert np.amax(np.fabs(test1 - np.array([0.26894142, 0.73105858]))) <= 1e-6 test2 = loglinear.softmax(np.array([1001, 1002])) print test2 assert np.amax(np.fabs(test2 - np.array([0.26894142, 0.73105858]))) <= 1e-6 test3 = loglinear.softmax(np.array([-1001, -1002])) print test3 assert np.amax(np.fabs(test3 - np.array([0.73105858, 0.26894142]))) <= 1e-6 # Sanity checks. If these fail, your gradient calculation is definitely wrong. # If they pass, it is likely, but not certainly, correct. from grad_check import gradient_check