def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. np.random.shuffle(train_data) for x, y in train_data: x = np.array(x, ndmin = 2, dtype=np.double) # make row vector loss, grads = lp.loss_and_gradients(x,y,params) cum_loss += loss for i in range(len(params)): params[i] = params[i] - learning_rate * grads[i] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print("I {0:}, train_loss {1:}, train_accuracy {2:}, dev_accuracy {3:}"\ .format(I, train_loss, train_accuracy, dev_accuracy)) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ W, b, U, b_tag = params for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = model.loss_and_gradients(x, y, [W, b, U, b_tag]) cum_loss += loss gW, gb, gU, gb_tag = grads W -= gW * learning_rate b -= gb * learning_rate U -= gU * learning_rate b_tag -= gb_tag * learning_rate train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return [W, b, U, b_tag]
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = mpl1.loss_and_gradients(x, y, params) cum_loss += loss # update the parameters according to the gradients # and the learning rate. params[0] -= grads[0] * learning_rate params[1] -= grads[1] * learning_rate train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_xor(train_data, num_iterations, learning_rate, params, decay=1e-3): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for y, x in train_data: loss, grads = ll.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. gW, gb, gU, gb_tag = grads W, b, U, b_tag = params W -= (learning_rate * gW) b -= (learning_rate * gb) U -= (learning_rate * gU) b_tag -= (learning_rate * gb_tag) learning_rate = max(1e-3, learning_rate - decay) train_loss = cum_loss / len(train_data) print I, train_loss return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for epoch in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec_uni(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) gw, gb, gu, gb_tag = grads cum_loss += loss W, b, U, b_tag = params W -= learning_rate * gw b -= learning_rate * gb U -= learning_rate * gu b_tag -= learning_rate * gb_tag params = W, b, U, b_tag # update the parameters according to the gradients # and the learning rate. """if epoch == 4: learning_rate = 0.001""" train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(epoch, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = label# convert the label to number if needed. loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss params =np.subtract(params,np.multiply(grads,learning_rate))#gradients computed previous file, now the sgd is the train print(mlp1.classifier_output(x,params),y) # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print (I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = utils.L2I[label] loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. params = np.subtract(params, np.multiply(learning_rate, grads)) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. for label, features in train_data: loss, grads = mlp.loss_and_gradients(features, label, params) cum_loss += loss params = [ param - grad for param, grad in zip( params, [grad * learning_rate for grad in grads]) ] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print((I, train_loss, train_accuracy, dev_accuracy)) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ [W, b, U, b_tag] = params for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I.get(label) # convert the label to number if needed. loss, [gW, gb, gU, gb_tag] = mlp1.loss_and_gradients(x, y, params) cum_loss += loss W -= learning_rate * gW b -= learning_rate * gb U -= learning_rate * gU b_tag -= learning_rate * gb_tag # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for y, x in train_data: loss, grads = ml.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. params[0] -= learning_rate * grads[0] params[1] -= learning_rate * grads[1] params[2] -= learning_rate * grads[2] params[3] -= learning_rate * grads[3] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) print I, train_loss, train_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ U, W, bu, bw = params for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = utils.L2I[label] # grads is [gW, gbw, gU, gbu] loss, grads = mlp1.loss_and_gradients(x, y, [U, W, bu, bw]) cum_loss += loss W = W - learning_rate * grads[0] bw = bw - learning_rate * grads[1] U = U - learning_rate * grads[2] bu = bu - learning_rate * grads[3] # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, [U, W, bu, bw]) dev_accuracy = accuracy_on_dataset(dev_data, [U, W, bu, bw]) print I + 1, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = features # numpy vector. y = label # a number. loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss # SGD update parameters U, W, b, b_tag = params updated_U = U - learning_rate * grads[0] updated_W = W - learning_rate * grads[1] updated_b = b - learning_rate * grads[2] updated_btag = b_tag - learning_rate * grads[3] params = (updated_U, updated_W, updated_b, updated_btag) # notify progress train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ max_dev_accuracy = dev_accuracy = 0 for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. #x = features XOR problem y = label # convert the label to number if needed. y = utils.L2I[y] loss, grads = mlp.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. for i in range(len(params)): eff_grad = grads[i] * learning_rate params[i] -= eff_grad #if ((I + 1) % 10 == 0): learning_rate /= 2 train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) if (dev_accuracy >= max_dev_accuracy) and (dev_accuracy > 0.6): max_dev_accuracy = dev_accuracy best_params = [] best_params_index = I for i in range(len(params)): best_params.append(np.copy(params[i])) if (max_dev_accuracy > 0.6): train_accuracy = accuracy_on_dataset(train_data, best_params) dev_accuracy = accuracy_on_dataset(dev_data, best_params) print("best_params") print(" train_accuracy, dev_accuracy") print(train_accuracy, dev_accuracy) return best_params else: return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ gU = np.zeros(params[0].shape) gb2 = np.zeros(params[1].shape) gW = np.zeros(params[2].shape) gb1 = np.zeros(params[3].shape) for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. gU = momentum * gU + (1. - momentum) * grads[0] gb2 = momentum * gb2 + (1. - momentum) * grads[1] gW = momentum * gW + (1. - momentum) * grads[2] gb1 = momentum * gb1 + (1. - momentum) * grads[3] regularization = 0.01 params[0] -= learning_rate * gU + regularization * gU params[1] -= learning_rate * gb2 params[2] -= learning_rate * gW + regularization * gW params[3] -= learning_rate * gb1 train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) if dev_accuracy > 0.89: print dev_accuracy return params print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = np.copy( feats_to_vec(features)) # convert features to a vector. y = np.copy(L2I[label]) # convert the label to number if needed. loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. # new_grad = np.array(grads) W, b, U, b_tag = params W -= learning_rate * grads[0] b -= learning_rate * grads[1] U -= learning_rate * grads[2] b_tag -= learning_rate * grads[3] params = [W, b, U, b_tag] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print( 'iteration :' + str(I) + 'train_ loss: ' + str(train_loss) + 'train_acc:' + str(train_accuracy) + ' dev_acc:', str(dev_accuracy)) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = mlp1.loss_and_gradients(x,y,params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. W, b, U, b_tag = params gW, gb, gU, gb_tag = grads W_new = W - (learning_rate*gW) b_new = b.reshape(-1,1) - (learning_rate * gb) U_new = U - (learning_rate * gU) b_tag_new = b_tag.reshape(-1,1) - (learning_rate * gb_tag) params = [W_new, b_new, U_new, b_tag_new] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ costs = [] acc = [] for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss # update the parameters according to the gradients # and the learning rate. params[0] -= learning_rate * grads[0] params[1] -= learning_rate * grads[1] params[2] -= learning_rate * grads[2] params[3] -= learning_rate * grads[3] train_loss = cum_loss / len(train_data) costs.append(train_loss) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) acc.append((train_accuracy, dev_accuracy)) print I, train_loss, train_accuracy, dev_accuracy #fig = plt.plot(acc) #fig1 = plt.plot(costs) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: # convert features to a vector. x = feats_to_vec(features) #print (x.shape) language_label = utils.L2I[label] y = language_label # convert the label to number if needed. loss, grads = mlp1.loss_and_gradients(x, y, params) #print 'helllllll' cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. # update b matrix - rule update : b = b -n * gradientB update_rule_params(grads, learning_rate, params) #print('bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb') train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, num_iterations, learning_rate, params): for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for y, x in train_data: loss, grads = mlp1.loss_and_gradients(x, y, params) cum_loss += loss W, b, U, b_tag = params gW, gb, gU, gb_tag = grads W_new = W - (learning_rate * gW) b_new = b.reshape(-1, 1) - (learning_rate * gb) U_new = U - (learning_rate * gU) b_tag_new = b_tag.reshape(-1, 1) - (learning_rate * gb_tag) params = [W_new, b_new, U_new, b_tag_new] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) print(I, train_loss, train_accuracy) if train_accuracy == 1: return params return params
learning_rate = 0.1 hidden_dim = 10 train_data = [(l, numpy.array(f)) for l, f in xr.data] dev_data = list(train_data) in_dim = 2 out_dim = 2 params = m1.create_classifier(in_dim, hidden_dim, out_dim) print "itn train_l train_a dev_a" while acc < 1.0: num_iterations += 1 cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: # x = feats_to_vec(features, F2I) # convert features to a vector. # y = ut.L2I[label] # convert the label to number if needed. loss, grads = m1.loss_and_gradients(features, label, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. for i, g in enumerate(grads): params[i] -= learning_rate * g train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) acc = accuracy_on_dataset(dev_data, params) print num_iterations, train_loss, train_accuracy, acc
def _loss_and_b_tag_grad(b_tag): x = np.array([[1, 2, 3]], np.double) loss, grads = mlp1.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[3]
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params, reg_lambda=None, dropout=False, Xor=False): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: import numpy as np if Xor == False: x = feats_to_vec(features) # convert features to a vector. else: x = np.array(features) # convert features to a vector. y = label # convert the label to number if needed. if dropout == True: ber = np.random.choice( [0, 1], size=params[1].shape, p=[1. / 2, 1. / 2] ) # random bernoli vector for dropout at the size of the hidden layer loss, grads = ml.loss_and_gradients(x, y, params, reg_lambda, ber) else: loss, grads = ml.loss_and_gradients(x, y, params, reg_lambda, None) cum_loss += loss w_hid = grads[0] * learning_rate b_hid = grads[1] * learning_rate w_out = grads[2] * learning_rate b_out = grads[3] * learning_rate params[0] = np.subtract(params[0], w_hid) params[1] = np.subtract(params[1], b_hid) params[2] = np.subtract(params[2], w_out) params[3] = np.subtract(params[3], b_out) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params, Xor) if Xor == False: dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy else: print I, train_loss, train_accuracy return params