Esempio n. 1
0
def SGD_train(train_labels, train_sentences, max_epoch, validate_labels, validate_sentences, method, lr=0.1):
    """
    The main subroutine for training by SGD training, train_labels and
    train_sentence are list of lists of corresponding (y,x) pairs.
    validate_labels and validate_sentences are similar list of lists for doing 
    early stopping regularization.
    """
    # max_epoch = 5
    converged = 0
    # lr = 0.1
    avg_time_per_epoch = 0.0
    score_list = []
    num_training = len(train_sentences)
    order = np.arange(0, num_training, dtype="int")  # ordering of training ex's

    J = ff.calcJ()
    # two weightvectors for bookkeeping, and a dw vector for updates, we initialize
    # the weightvector at zero
    # w_traj = np.zeros(shape=(max_epoch+1,J))
    weights = np.zeros(J)
    # w_traj[0,:] = weights
    old_weights = np.zeros(J)
    dw = np.zeros(J)
    score = 0.0
    score_list.append(score)

    for epoch in range(max_epoch):
        time1 = time.time()
        # print "epoch number {}".format(epoch)
        i = 0
        # put something in here about learning rate?
        np.random.shuffle(order)  # randomly shuffle test data
        old_weights = np.copy(weights)
        for ind_ex in order:
            # print "Now processing sample {} of {}".format(i,num_training)
            x = train_sentences[ind_ex]
            y = train_labels[ind_ex]
            dw = compute_gradient(x, y, weights, dw)
            weights += lr * dw
            i += 1
        time2 = time1 - time.time()
        avg_time_per_epoch += time2
        # convergence test, remove this commentary when we have a score function
        # in collins module
        new_score = sr.general_score(weights, validate_labels, validate_sentences, method, 0)
        score_list.append(new_score)
        if new_score > score:
            # the validation score has increased
            score = new_score
        else:
            # validation score has decreased, early stopping dictates we stop
            # training and use the old weights
            converged = 1
            weights = np.copy(old_weights)
            break

    avg_time_per_epoch /= float(epoch)
    # score = 0.0
    return weights, score_list, epoch, avg_time_per_epoch
Esempio n. 2
0
def collins(train_labels, train_sentences, validation_labels, 
            validation_sentences, pct_train=0.5, Nex=None):
    """
    Runs the Collins perceptron training on the input training data.
    
    labels - All training, validation labels.
    sentences - All training, validation sentences.
    pct_train - Percentage of examples from data set to use as training data.
             The rest are used as validation data.
    """
    
    # get J, the total number of feature functions
    J = ffs.calcJ()
    print 'J = ',J
    
    # now run it
    scores = []
    w0 = np.zeros(J)
    print 'Calculating initial score...'
    scores.append(sr.score_by_word(w0,validation_labels,validation_sentences))
    print 'Done!\n'
    # run until converged, according to score on validation set
    nep = 1
    epoch_time = []
    
    print 'Initiating Collins perceptron training.'
    while True:
        print 'Epoch #',nep,'...'
        t0 = time.time()
        # get the new weights & score
        print 'Training...'
        w1 = collins_epoch(train_labels, train_sentences, w0)
        print 'Done.\n'
        epoch_time.append([time.time() - t0])
        
        t0 = time.time()
        print 'Calculating new score...'
        scores.append(sr.general_score(w1,validation_labels,validation_sentences,'word',0))
        print 'Done.\n'
        epoch_time[nep-1].append(time.time() - t0)
        
        # decide if converged
        if scores[nep] < scores[nep-1]:
            break
        else:
            w0 = w1
        nep += 1
        
    print 'Training complete!\n'
    
    """
    # make a prediction on a dummy sentence
    #dummy = ['FIRSTWORD','I','like','cheese','but','I','also','like','bread','LASTWORD']
    dummy = ['FIRSTWORD','Do','you','like','cheese','LASTWORD']
    g_dummy = sr.g(w,dummy)
    U_dummy = sr.U(g_dummy)
    y_best = sr.bestlabel(U_dummy,g_dummy)
    """
    
    # now return final weights, score time series, and epoch timing
    return w0, scores, epoch_time