Esempio n. 1
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for e_i in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:

            x = feats_to_vec(features)  # convert features to a vector.
            y = L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            params[0] -= (learning_rate * grads[0])
            params[1] -= (learning_rate * grads[1])

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print(e_i, train_loss, train_accuracy, dev_accuracy)
    return params
Esempio n. 2
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = features  # numpy vector.
            y = label  # a number.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss

            # SGD update parameters
            W, b = params
            updated_W = W - learning_rate * grads[0]
            updated_b = b - learning_rate * grads[1]
            params = (updated_W, updated_b)

        # notify progress
        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params
Esempio n. 3
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    # costs = []
    # acc = []
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = np.divide(features, 2)  # convert features to a vector.
            y = label  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # update the parameters according to the gradients
            # and the learning rate.
            params[0] -= learning_rate * grads[0]
            params[1] -= learning_rate * grads[1]

        train_loss = cum_loss / len(train_data)
        # costs.append(train_loss)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        # acc.append((train_accuracy,dev_accuracy))
        print I, train_loss, train_accuracy, dev_accuracy
    # fig = plt.plot(acc)
    # fig1 = plt.plot(costs)
    return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.
    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = features  # convert features to a vector.
            y = label  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # update the parameters according to the gradients
            # and the learning rate.
            gW, gb = grads
            params[0] -= gW * learning_rate
            params[1] -= gb * learning_rate

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params
Esempio n. 5
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train.txt a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    #global start_count
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.
            params[0] -= grads[0] * learning_rate
            params[1] -= grads[1] * learning_rate

        if ((I + 1) % 20 == 0): learning_rate /= 5
        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        #if (dev_accuracy > 0.88): start_count = True
        print(I, train_loss, train_accuracy, dev_accuracy)
    return params
Esempio n. 6
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    _params = params
    lr = learning_rate
    for I in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = ut.L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, _params)
            cum_loss += loss
            W, b = _params
            Wg, bg = grads
            W -= np.dot(lr, Wg)
            b -= np.dot(lr, bg)
            _params = [W, b]

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print(I, train_loss, train_accuracy, dev_accuracy)
    return _params
Esempio n. 7
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    W, b = params
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = utils.L2I[label]
            loss, grads = ll.loss_and_gradients(x, y, [W, b])
            cum_loss += loss
            W = W - learning_rate * grads[0]
            b = b - learning_rate * grads[1]
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, [W, b])
        dev_accuracy = accuracy_on_dataset(dev_data, [W, b])
        print I + 1, train_loss, train_accuracy, dev_accuracy
    return params
Esempio n. 8
0
def train_classifier(train_data, num_iterations, learning_rate, params):
    """
    Create and train a classifier, and return the parameters.
    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in range(num_iterations):
        cum_loss = 0.0 # total loss in this iteration.
        random.shuffle(train_data);
        for l in range(train_data.shape[0]):
            x = train_data[l,1:]             # convert features to a vector.
            y = train_data[l,0]          # convert the label to number if needed.
            loss, grads = loss_and_gradients(x,y,params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.
            grad_W, grad_b = grads
            W, b = params
            params[0] = grad_W*learning_rate + params[0]
            params[1] =


        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        #dev_accuracy = accuracy_on_dataset(dev_data, params)
        #print (I, train_loss, train_accuracy, dev_accuracy)
    return params
Esempio n. 9
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            # convert features to a vector.
            x = feats_to_vec(features)
            language_label = utils.L2I[label]
            y = language_label  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.
            # update b matrix - rule update : b = b -n * gradientB
            params[1] = params[1] - learning_rate * grads[1]

            # update W matrix - rule update : w = w -n * gradientW
            params[0] = params[0] - learning_rate * grads[0]

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params
Esempio n. 10
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in range(num_iterations):
        cum_loss = 0.0 # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features) # convert features to a vector.
            y = label# convert the label to number if needed.
            # YOUR CODE HERE
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            params =np.subtract(params,np.multiply(grads,learning_rate))  #must to do minus
            print(ll.classifier_output(x,params))

             #gradients computed previous file, now the sgd is the train
            # update the parameters according to the gradients
            # and the learning rate.

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print (I, train_loss, train_accuracy, dev_accuracy)
    return params
def train_classifier(train_data, dev_data, num_iterations , learning_rate , params):
    """
        Create and train a classifier, and return the parameters.
        
        train_data: a list of (label, feature) pairs.
        dev_data  : a list of (label, feature) pairs.
        num_iterations: the maximal number of training iterations.
        learning_rate: the learning rate to use.
        params: list of parameters (initial values)
        """
    for I in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.
            new_grad = np.array(grads)
            params -= learning_rate * new_grad
        
        train_loss = cum_loss / len(TRAIN)
        train_accuracy = accuracy_on_dataset(TRAIN, params)
        dev_accuracy = accuracy_on_dataset(DEV, params)

        print('iteration :' + str(I) +
               'train_ loss: ' + str(train_loss) +
               'train_acc:'+ str(train_accuracy) +
               ' dev_acc:' , str(dev_accuracy))
    return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = utils.L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.
            params = np.subtract(params, np.multiply(learning_rate, grads))

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params
Esempio n. 13
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for I in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            print("label", label)
            print("label index aka y", list(utils.L2I).index(label))
            print("features", features)
            x = feats_to_vec(features)  # convert features to a vector.
            y = list(utils.L2I).index(
                label)  # convert the label to number if needed.
            #i changed the parms to be induvidial
            #pn=ll.create_classifier(len(x), out_dim)
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            print("grad :", grads)

            print("===============")
            print("params :", params)
            print("===============")

            params = np.subtract(params, np.array(grads).dot(learning_rate))
            print("params after change:", params)
            print("===============")
            # update the parameters according to the gradients
            # and the learning rate.

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print(I, train_loss, train_accuracy, dev_accuracy)
    return pn
Esempio n. 14
0
def loss_and_gradients(x, y, params):
    """
    params: a list of the form [W, b, U, b_tag]

    returns:
        loss,[gW, gb, gU, gb_tag]

    loss: scalar
    gW: matrix, gradients of W
    gb: vector, gradients of b
    gU: matrix, gradients of U
    gb_tag: vector, gradients of b_tag
    """
    W, b, U, b_tag = params

    # YOU CODE HERE
    post_activation = get_post_activation(x, params)
    probs = classifier_output(x, params)
    loss = -np.log(probs[y])
    import loglinear
    ___, g_activation = loglinear.loss_and_gradients(post_activation, y,
                                                     [U, b_tag])
    gU = g_activation[0]
    gb_tag = g_activation[1]
    g_post_tanh = np.dot(gb_tag, U.T)
    grad_pre_tanh = (np.ones_like(post_activation) -
                     (post_activation**2)) * g_post_tanh

    gb = grad_pre_tanh

    grad_pre_tanh_numpy = grad_pre_tanh[np.newaxis, :]

    x_input = np.array(x)
    x_input = x_input[:, np.newaxis]
    gW = np.dot(x_input, grad_pre_tanh_numpy)
    # loss += (np.sum(W**2) + np.sum(b**2) + np.sum(U**2) + np.sum(b_tag**2))
    # gW += 2 * W
    # gb += 2 * b
    # gU += 2 * U
    # gb_tag += 2 * b_tag

    return loss, [gW, gb, gU, gb_tag]
Esempio n. 15
0
def loss_and_gradients(x, y, params):
    """
    params: a list of the form [W, b, U, b_tag]

    returns:
        loss,[gW, gb, gU, gb_tag]

    loss: scalar
    gW: matrix, gradients of W
    gb: vector, gradients of b
    gU: matrix, gradients of U
    gb_tag: vector, gradients of b_tag
    """

    W, b, U, b_tag = params
    lo = ll.linear_output(x, (W, b))
    x_tag = np.tanh(lo)

    loss, (gU, gb_tag) = ll.loss_and_gradients(x_tag, y, (U, b_tag))
    gb = np.dot(gb_tag, U.T) * (1 - (np.tanh(lo)**2))
    gW = np.dot(np.atleast_2d(x).T, np.atleast_2d(gb))
    return loss, [gW, gb, gU, gb_tag]
Esempio n. 16
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    prev_dev_accuracy = 0
    for I in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for x, y in train_data:
            x = np.array(x, ndmin=2)  # make row vector
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            params[0] = params[0] - learning_rate * grads[0]
            params[1] = params[1] - learning_rate * grads[1]
            #for param, grad in zip(params, grads):
            #    param = param - learning_rate * grad

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        if dev_accuracy < prev_dev_accuracy and I > config.loglin.min_iterations:
            print(
                "early stopping criterion in iteration {} - detriorating dev accuracy"
                .format(I))
            params = prev_params
            break
        prev_params = [p.copy() for p in params]
        prev_dev_accuracy = dev_accuracy
        print("I {}, train_loss {}, train_accuracy {}, dev_accuracy {}"\
                .format(I, loss, train_accuracy, dev_accuracy))
    return params
def train_classifier(train_data, dev_data, num_iterations, learning_rate, params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    for i in range(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = features
            y = label
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.

            new_params = []
            for j in range(len(params)):
                current_param = params[j]
                param_grad = grads[j]
                updated_param = current_param - learning_rate * param_grad
                new_params.append(updated_param)

            params = new_params

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print(i, train_loss, train_accuracy, dev_accuracy)

    return params
Esempio n. 18
0
def train_classifier(train_data, dev_data, num_iterations, learning_rate,
                     params):
    """
    Create and train a classifier, and return the parameters.

    train_data: a list of (label, feature) pairs.
    dev_data  : a list of (label, feature) pairs.
    num_iterations: the maximal number of training iterations.
    learning_rate: the learning rate to use.
    params: list of parameters (initial values)
    """
    #cumm_grad = [np.zeros(params[0].shape), np.zeros(params[1].shape)]
    for I in xrange(num_iterations):
        cum_loss = 0.0  # total loss in this iteration.
        random.shuffle(train_data)
        for label, features in train_data:
            x = feats_to_vec(features)  # convert features to a vector.
            y = L2I[label]  # convert the label to number if needed.
            loss, grads = ll.loss_and_gradients(x, y, params)
            cum_loss += loss
            # YOUR CODE HERE
            # update the parameters according to the gradients
            # and the learning rate.
            gW, gb = grads
            #cumm_grad[0]+=gW
            #cumm_grad[1]+=gb
            params[0] -= learning_rate * gW
            params[1] -= learning_rate * gb

        #params[0]-=learning_rate*cumm_grad[0]/len(train_data)
        #params[1]-=learning_rate*cumm_grad[1]/len(train_data)

        train_loss = cum_loss / len(train_data)
        train_accuracy = accuracy_on_dataset(train_data, params)
        dev_accuracy = accuracy_on_dataset(dev_data, params)
        print I, train_loss, train_accuracy, dev_accuracy
    return params
Esempio n. 19
0
 def _loss_and_W_grad(W):
     global b
     x = np.array([[1, 2, 3]], np.double)
     loss, grads = ll.loss_and_gradients(x, 0, [W, b])
     return loss, grads[0]
Esempio n. 20
0
 def _loss_and_b_grad(b):
     global W
     x = np.array([[1, 2, 3]], np.double)
     loss, grads = ll.loss_and_gradients(x, 0, [W, b])
     return loss, grads[1]