예제 #1
0
def test_softmax_gradient(samples, features, classes, check_count=20):
    dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0
    dummy_features = np.random.randn(samples, features)
    dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1)
    #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly)
    #assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15

    f = lambda w: softmaxRegression(dummy_features,
                                    dummy_labels,
                                    w,
                                    regularization=0.0,
                                    nopredictions=False)[0]
    g = lambda w: softmaxRegression(dummy_features,
                                    dummy_labels,
                                    w,
                                    regularization=0.0,
                                    nopredictions=False)[1]

    W = dummy_weights
    grad_analytic = g(W)

    for i in range(check_count):
        ix = tuple([random.randrange(m) for m in W.shape])
        shift = np.zeros(W.shape)
        shift[ix] = 1e-7
        grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
        assert (abs(grad_numerical - grad_analytic[ix]) /
                (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0002)
예제 #2
0
def test_softmax_regression(samples, features, classes):
    dummy_weights  = 0.1 * np.random.randn(features, classes) + 1.0
    dummy_features = np.random.randn(samples, features)
    dummy_labels   = np.argmax(np.random.randn(samples, classes), axis=1)
    #np.sqrt(p * (1 - p) / N )
    #n/N
    #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly)
    assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 
예제 #3
0
def test_softmax_gradient(samples, features, classes, check_count=20):
    dummy_weights  = 0.1 * np.random.randn(features, classes) + 1.0
    dummy_features = np.random.randn(samples, features)
    dummy_labels   = np.argmax(np.random.randn(samples, classes), axis=1)
    #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly)
    #assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 

    f = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[0]
    g = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[1]

    W = dummy_weights
    grad_analytic = g(W)

    for i in range(check_count):
        ix = tuple([random.randrange(m) for m in W.shape])
        shift = np.zeros(W.shape)
        shift[ix] = 1e-7
        grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
        assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0002)
예제 #4
0
def test_softmax_regression(samples, features, classes):
    dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0
    dummy_features = np.random.randn(samples, features)
    dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1)
    #np.sqrt(p * (1 - p) / N )
    #n/N
    #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly)
    assert np.abs(-np.log(1. / classes) - (
        softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])
                  ) / -np.log(1. / classes) <= 0.15
def getSentiment():

    dataSet = StanfordSentiment()
    tokens = dataSet.tokens()

    # Load the word vectors we trained earlier
    _, wordVectors, _ = load_saved_params()
    dimVectors = wordVectors.shape[1]

    # Load the test sentences
    sentences = dataSet.getTestSentences()
    print sentences
    nSentences = len(sentences)
    sentenceFeatures = np.zeros((nSentences, dimVectors))
    sentenceLabels = np.zeros((nSentences, ), dtype=np.int32)
    for i in xrange(nSentences):
        words, sentenceLabels[i] = sentences[i]
        #print sentences[i]
        #print words
        #print sentenceLabels[i]
        sentenceFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

    #train weights
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, nSentences)
    regularization = 0.00001
    weights = sgd(lambda weights: softmax_wrapper(
        sentenceFeatures, sentenceLabels, weights, regularization),
                  weights,
                  3.0,
                  10,
                  PRINT_EVERY=10)

    #pred = np.sum((weights * sentenceFeatures.T).T, axis = 1)
    #pred = softmax(pred)
    #testAccuracy = accuracy(sentenceLabels, pred)

    prob = softmax(sentenceFeatures.dot(weights))
    #prob = normalizeRows(prob)
    _, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights)
    pred = 1 / (1 + np.exp(-pred))

    #for polarity in pred:
    #print categorify(polarity)
    #_, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights)
    for label in pred:
        print categorify(label)
    #pred = categorify(pred)
    print prob
    print pred
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels,
                                                  weights, regularization),
                  weights,
                  3.0,
                  10000,
                  PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg": regularization,
        "weights": weights,
        "train": trainAccuracy,
        "dev": devAccuracy
    })
예제 #7
0
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 
        weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg" : regularization, 
        "weights" : weights, 
        "train" : trainAccuracy, 
        "dev" : devAccuracy})
예제 #8
0
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 
        weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg" : regularization, 
        "weights" : weights, 
        "train" : trainAccuracy, 
        "dev" : devAccuracy})
예제 #9
0
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels,
                                                  weights, regularization),
                  weights,
                  3.0,
                  10000,
                  PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights,
                                   regularization)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights,
                                   regularization)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg": regularization,
        "weights": weights,
        "train": trainAccuracy,
        "dev": devAccuracy
예제 #10
0
devFeatures = np.zeros((nDev, dimVectors))
devLabels = np.zeros((nDev,), dtype=np.int32)
for i in xrange(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmaxRegression(trainFeatures, trainLabels, 
        weights, regularization, nopredictions=True), weights, 0.3, 200000, PRINT_EVERY=20000)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy
    print "Prediction VS ||Weights||^2: %f - %f" % \
        (predictionLoss(trainFeatures, trainLabels, weights),
         regularizationLoss(weights, 1.0))

    # Save the results and weights