def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for e_i in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss params[0] -= (learning_rate * grads[0]) params[1] -= (learning_rate * grads[1]) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(e_i, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = features # numpy vector. y = label # a number. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # SGD update parameters W, b = params updated_W = W - learning_rate * grads[0] updated_b = b - learning_rate * grads[1] params = (updated_W, updated_b) # notify progress train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ # costs = [] # acc = [] for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = np.divide(features, 2) # convert features to a vector. y = label # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # update the parameters according to the gradients # and the learning rate. params[0] -= learning_rate * grads[0] params[1] -= learning_rate * grads[1] train_loss = cum_loss / len(train_data) # costs.append(train_loss) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) # acc.append((train_accuracy,dev_accuracy)) print I, train_loss, train_accuracy, dev_accuracy # fig = plt.plot(acc) # fig1 = plt.plot(costs) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = features # convert features to a vector. y = label # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # update the parameters according to the gradients # and the learning rate. gW, gb = grads params[0] -= gW * learning_rate params[1] -= gb * learning_rate train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train.txt a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ #global start_count for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. params[0] -= grads[0] * learning_rate params[1] -= grads[1] * learning_rate if ((I + 1) % 20 == 0): learning_rate /= 5 train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) #if (dev_accuracy > 0.88): start_count = True print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ _params = params lr = learning_rate for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, _params) cum_loss += loss W, b = _params Wg, bg = grads W -= np.dot(lr, Wg) b -= np.dot(lr, bg) _params = [W, b] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return _params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ W, b = params for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = utils.L2I[label] loss, grads = ll.loss_and_gradients(x, y, [W, b]) cum_loss += loss W = W - learning_rate * grads[0] b = b - learning_rate * grads[1] # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, [W, b]) dev_accuracy = accuracy_on_dataset(dev_data, [W, b]) print I + 1, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data); for l in range(train_data.shape[0]): x = train_data[l,1:] # convert features to a vector. y = train_data[l,0] # convert the label to number if needed. loss, grads = loss_and_gradients(x,y,params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. grad_W, grad_b = grads W, b = params params[0] = grad_W*learning_rate + params[0] params[1] = train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) #dev_accuracy = accuracy_on_dataset(dev_data, params) #print (I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: # convert features to a vector. x = feats_to_vec(features) language_label = utils.L2I[label] y = language_label # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. # update b matrix - rule update : b = b -n * gradientB params[1] = params[1] - learning_rate * grads[1] # update W matrix - rule update : w = w -n * gradientW params[0] = params[0] - learning_rate * grads[0] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = label# convert the label to number if needed. # YOUR CODE HERE loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss params =np.subtract(params,np.multiply(grads,learning_rate)) #must to do minus print(ll.classifier_output(x,params)) #gradients computed previous file, now the sgd is the train # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print (I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations , learning_rate , params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. new_grad = np.array(grads) params -= learning_rate * new_grad train_loss = cum_loss / len(TRAIN) train_accuracy = accuracy_on_dataset(TRAIN, params) dev_accuracy = accuracy_on_dataset(DEV, params) print('iteration :' + str(I) + 'train_ loss: ' + str(train_loss) + 'train_acc:'+ str(train_accuracy) + ' dev_acc:' , str(dev_accuracy)) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = utils.L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. params = np.subtract(params, np.multiply(learning_rate, grads)) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: print("label", label) print("label index aka y", list(utils.L2I).index(label)) print("features", features) x = feats_to_vec(features) # convert features to a vector. y = list(utils.L2I).index( label) # convert the label to number if needed. #i changed the parms to be induvidial #pn=ll.create_classifier(len(x), out_dim) loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE print("grad :", grads) print("===============") print("params :", params) print("===============") params = np.subtract(params, np.array(grads).dot(learning_rate)) print("params after change:", params) print("===============") # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return pn
def loss_and_gradients(x, y, params): """ params: a list of the form [W, b, U, b_tag] returns: loss,[gW, gb, gU, gb_tag] loss: scalar gW: matrix, gradients of W gb: vector, gradients of b gU: matrix, gradients of U gb_tag: vector, gradients of b_tag """ W, b, U, b_tag = params # YOU CODE HERE post_activation = get_post_activation(x, params) probs = classifier_output(x, params) loss = -np.log(probs[y]) import loglinear ___, g_activation = loglinear.loss_and_gradients(post_activation, y, [U, b_tag]) gU = g_activation[0] gb_tag = g_activation[1] g_post_tanh = np.dot(gb_tag, U.T) grad_pre_tanh = (np.ones_like(post_activation) - (post_activation**2)) * g_post_tanh gb = grad_pre_tanh grad_pre_tanh_numpy = grad_pre_tanh[np.newaxis, :] x_input = np.array(x) x_input = x_input[:, np.newaxis] gW = np.dot(x_input, grad_pre_tanh_numpy) # loss += (np.sum(W**2) + np.sum(b**2) + np.sum(U**2) + np.sum(b_tag**2)) # gW += 2 * W # gb += 2 * b # gU += 2 * U # gb_tag += 2 * b_tag return loss, [gW, gb, gU, gb_tag]
def loss_and_gradients(x, y, params): """ params: a list of the form [W, b, U, b_tag] returns: loss,[gW, gb, gU, gb_tag] loss: scalar gW: matrix, gradients of W gb: vector, gradients of b gU: matrix, gradients of U gb_tag: vector, gradients of b_tag """ W, b, U, b_tag = params lo = ll.linear_output(x, (W, b)) x_tag = np.tanh(lo) loss, (gU, gb_tag) = ll.loss_and_gradients(x_tag, y, (U, b_tag)) gb = np.dot(gb_tag, U.T) * (1 - (np.tanh(lo)**2)) gW = np.dot(np.atleast_2d(x).T, np.atleast_2d(gb)) return loss, [gW, gb, gU, gb_tag]
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ prev_dev_accuracy = 0 for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for x, y in train_data: x = np.array(x, ndmin=2) # make row vector loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss params[0] = params[0] - learning_rate * grads[0] params[1] = params[1] - learning_rate * grads[1] #for param, grad in zip(params, grads): # param = param - learning_rate * grad train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) if dev_accuracy < prev_dev_accuracy and I > config.loglin.min_iterations: print( "early stopping criterion in iteration {} - detriorating dev accuracy" .format(I)) params = prev_params break prev_params = [p.copy() for p in params] prev_dev_accuracy = dev_accuracy print("I {}, train_loss {}, train_accuracy {}, dev_accuracy {}"\ .format(I, loss, train_accuracy, dev_accuracy)) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for i in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = features y = label loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. new_params = [] for j in range(len(params)): current_param = params[j] param_grad = grads[j] updated_param = current_param - learning_rate * param_grad new_params.append(updated_param) params = new_params train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(i, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ #cumm_grad = [np.zeros(params[0].shape), np.zeros(params[1].shape)] for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. gW, gb = grads #cumm_grad[0]+=gW #cumm_grad[1]+=gb params[0] -= learning_rate * gW params[1] -= learning_rate * gb #params[0]-=learning_rate*cumm_grad[0]/len(train_data) #params[1]-=learning_rate*cumm_grad[1]/len(train_data) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def _loss_and_W_grad(W): global b x = np.array([[1, 2, 3]], np.double) loss, grads = ll.loss_and_gradients(x, 0, [W, b]) return loss, grads[0]
def _loss_and_b_grad(b): global W x = np.array([[1, 2, 3]], np.double) loss, grads = ll.loss_and_gradients(x, 0, [W, b]) return loss, grads[1]