def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = utils.L2I[label] # convert the label to number if needed. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. update(grads, params, learning_rate) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = mlp_n.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. # Teata = Theata - learnning rate * gradient. # For every param in the params list reduce the right gradient multiply the lr. for index, _ in enumerate(params): params[index] = params[index] - learning_rate * grads[index] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = features # numpy vector. y = label # a number. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # SGD update parameters for i in range(0, len(params)): params[i] -= learning_rate * grads[i] # notify progress train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I, train_loss, train_accuracy, dev_accuracy return params
def train_xor(train_data, num_iterations, learning_rate, params, decay=1e-3): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for y, x in train_data: loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. gW, gb, gU, gb_tag = grads W, b, U, b_tag = params W -= (learning_rate * gW) b -= (learning_rate * gb) U -= (learning_rate * gU) b_tag -= (learning_rate * gb_tag) learning_rate = max(1e-3, learning_rate - decay) train_loss = cum_loss / len(train_data) print(I, train_loss) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = np.copy( feats_to_vec(features)) # convert features to a vector. y = np.copy(L2I[label]) # convert the label to number if needed. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. for i in range(len(params)): params[i] -= learning_rate * grads[i] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print( 'iteration :' + str(I) + 'train_ loss: ' + str(train_loss) + 'train_acc:' + str(train_accuracy) + ' dev_acc:', str(dev_accuracy)) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = label # convert the label to number if needed. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss for i in range(len(params)): params[i][0] -= grads[i][0] * learning_rate params[i][1] -= grads[i][1] * learning_rate train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ costs = [] acc = [] for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # update the parameters according to the gradients # and the learning rate. for i in range(0,len(params)-1): params[i] -= learning_rate * grads[i] train_loss = cum_loss / len(train_data) costs.append(train_loss) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) acc.append((train_accuracy,dev_accuracy)) print I, train_loss, train_accuracy, dev_accuracy #fig = plt.plot(acc) #fig1 = plt.plot(costs) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) if I == 4: learning_rate /= 10 for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = ut.L2I[label] # convert the label to number if needed. loss, grads = model.loss_and_gradients(x, y, params) cum_loss += loss for i in range(0, len(params), 2): params[i] -= learning_rate * grads[i] b = params[i + 1] params[i + 1] = np.squeeze( (b - learning_rate * grads[i + 1].T).T) train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for epoch in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = ll.loss_and_gradients(x, y, params) gw, gb = grads cum_loss += loss W, b = params W -= learning_rate * gw b -= learning_rate * gb params = W, b # update the parameters according to the gradients # and the learning rate. if epoch == 4: learning_rate = 0.001 train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(epoch, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ # U, W, bu, bw = params for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = utils.L2I[label] # grads is [gW, gbw, gU, gbu] loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss i = 0 while i < len(params): params[i][0] -= learning_rate * grads[i][0] params[i][1] -= learning_rate * grads[i][1] i += 1 # W = W - learning_rate * grads[0] # bw = bw - learning_rate * grads[1] # U = U - learning_rate * grads[2] # bu = bu - learning_rate * grads[3] # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print I + 1, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for i in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = features y = label loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. new_params = [] for j in range(len(params)): current_param = params[j] param_grad = grads[j] updated_param = current_param - learning_rate * param_grad new_params.append(updated_param) params = new_params train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(i, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ weighted_grads = [] for p in params: weighted_grads.append(np.zeros(p.shape)) for I in xrange(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = mlpn.loss_and_gradients(x, y, params) cum_loss += loss for j, grad in enumerate(grads): weighted_grads[j] = momentum * weighted_grads[j] + ( 1 - momentum) * grad params[j] -= learning_rate * weighted_grads[j] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) if dev_accuracy >= 0.89: print dev_accuracy return params print I, train_loss, train_accuracy, dev_accuracy return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. random.shuffle(train_data) for label, features in train_data: x = feats_to_vec(features) # convert features to a vector. y = L2I[label] # convert the label to number if needed. loss, grads = mlpn.loss_and_gradients(x,y,params) cum_loss += loss # YOUR CODE HERE # update the parameters according to the gradients # and the learning rate. new_params = [] L = int(len(params) / 2) for i in range(L): W, b = params[i*2], params[(i*2)+1] gW, gb = grads[i*2], grads[(i*2) + 1] new_params.append(W - (learning_rate*gW)) new_params.append(b.reshape(-1,1) - (learning_rate * gb)) params = new_params train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print(I, train_loss, train_accuracy, dev_accuracy) return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params): """ Create and train a classifier, and return the parameters. train_data: a list of (label, feature) pairs. dev_data : a list of (label, feature) pairs. num_iterations: the maximal number of training iterations. learning_rate: the learning rate to use. params: list of parameters (initial values) """ prev_dev_accuracy = 0 for I in range(num_iterations): cum_loss = 0.0 # total loss in this iteration. np.random.shuffle(train_data) for x, y in train_data: x = np.array(x, ndmin=2, dtype=np.double) # make row vector loss, grads = pn.loss_and_gradients(x, y, params) cum_loss += loss for i in range(len(params)): params[i] = params[i] - learning_rate * grads[i] train_loss = cum_loss / len(train_data) train_accuracy = accuracy_on_dataset(train_data, params) dev_accuracy = accuracy_on_dataset(dev_data, params) print("I {0:}, train_loss {1:}, train_accuracy {2:0.5f}, dev_accuracy {3:0.5f}"\ .format(I, train_loss, train_accuracy, dev_accuracy)) if dev_accuracy < prev_dev_accuracy: print( "early stopping criterion in iteration {} - detriorating dev accuracy" .format(I)) params = prev_params break prev_params = [p.copy() for p in params] prev_dev_accuracy = dev_accuracy return params
def _loss_and_b3_grad(b3_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1, W2, b2, W3, b3_, W4, b4]) return loss, grads[5]
def _loss_and_W_grad(W_): loss, grads = mlpn.loss_and_gradients(x, y, [W_, b]) return loss, grads[0]
def _loss_and_b_grad(b_): loss, grads = mlpn.loss_and_gradients(x, y, [W, b_]) return loss, grads[1]
def _loss_and_W1_grad(W1_): loss, grads = mlpn.loss_and_gradients( x, y, [W1_, b1, W2, b2, W3, b3, W4, b4]) return loss, grads[0]
def _loss_and_b1_grad(b1_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1_, W2, b2, W3, b3, W4, b4]) return loss, grads[1]
def _loss_and_b_tag_grad(b_tag): x = np.array([1, 2, 3], np.double) loss, grads = mlpn.loss_and_gradients(x, 0, [W, b, U, b_tag]) return loss, grads[3]
def _loss_and_b4_grad(b4_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1, W2, b2, W3, b3, W4, b4_]) return loss, grads[7]
def _loss_and_W4_grad(W4_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1, W2, b2, W3, b3, W4_, b4]) return loss, grads[6]
def _loss_and_W2_grad(W2_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1, W2_, b2, W3, b3, W4, b4]) return loss, grads[2]
def _loss_and_W3_grad(W3_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1, W2, b2, W3_, b3, W4, b4]) return loss, grads[4]
def _loss_and_b2_grad(b2_): loss, grads = mlpn.loss_and_gradients( x, y, [W1, b1, W2, b2_, W3, b3, W4, b4]) return loss, grads[3]