def getSentiment():

    dataSet = StanfordSentiment()
    tokens = dataSet.tokens()

    # Load the word vectors we trained earlier
    _, wordVectors, _ = load_saved_params()
    dimVectors = wordVectors.shape[1]

    # Load the test sentences
    sentences = dataSet.getTestSentences()
    print sentences
    nSentences = len(sentences)
    sentenceFeatures = np.zeros((nSentences, dimVectors))
    sentenceLabels = np.zeros((nSentences, ), dtype=np.int32)
    for i in xrange(nSentences):
        words, sentenceLabels[i] = sentences[i]
        #print sentences[i]
        #print words
        #print sentenceLabels[i]
        sentenceFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

    #train weights
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, nSentences)
    regularization = 0.00001
    weights = sgd(lambda weights: softmax_wrapper(
        sentenceFeatures, sentenceLabels, weights, regularization),
                  weights,
                  3.0,
                  10,
                  PRINT_EVERY=10)

    #pred = np.sum((weights * sentenceFeatures.T).T, axis = 1)
    #pred = softmax(pred)
    #testAccuracy = accuracy(sentenceLabels, pred)

    prob = softmax(sentenceFeatures.dot(weights))
    #prob = normalizeRows(prob)
    _, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights)
    pred = 1 / (1 + np.exp(-pred))

    #for polarity in pred:
    #print categorify(polarity)
    #_, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights)
    for label in pred:
        print categorify(label)
    #pred = categorify(pred)
    print prob
    print pred
for i in xrange(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels,
                                                  weights, regularization),
                  weights,
                  3.0,
                  10000,
                  PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
예제 #3
0
    print "Training with 2 layer neural net!"

    dimensions = [dimVectors, 50, 5]
    weights = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
        dimensions[1] + 1) * dimensions[2], )
    tr_N = trainLabels.shape[0]
    tr_labels = np.zeros((tr_N, dimensions[2]))
    for i in xrange(tr_N):
        tr_labels[i, trainLabels[i]] = 1

    dv_N = devLabels.shape[0] 
    dv_labels = np.zeros((dv_N, dimensions[2]))
    for i in xrange(dv_N):
        dv_labels[i, devLabels[i]] = 1

    weights = sgd(lambda weights: neural_wrapper(trainFeatures, tr_labels, weights, dimensions), 
        weights, 3.0, ITER, PRINT_EVERY=100)

    _, _, pred = forward_backward_prop(trainFeatures, tr_labels, weights, dimensions)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    _, _, pred = forward_backward_prop(devFeatures, dv_labels, weights, dimensions)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy
    
    ###########################################

    # Save the results and weights
    results.append({
        "reg" : regularization, 
        "weights" : weights, 
예제 #4
0
devFeatures = np.zeros((nDev, dimVectors))
devLabels = np.zeros((nDev,), dtype=np.int32)
for i in xrange(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, 
        weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy

    # Save the results and weights
    results.append({
        "reg" : regularization, 
        "weights" : weights, 
# Context size
C = 5

# Reset the random seed to make sure that everyone gets the same results
random.seed(31415)
np.random.seed(9265)

startTime = time.time()
wordVectors = np.concatenate(
    ((np.random.rand(nWords, dimVectors) - 0.5) / dimVectors,
     np.zeros((nWords, dimVectors))),
    axis=0)
wordVectors = sgd(lambda vec: word2vec_sgd_wrapper(
    skipgram, tokens, vec, dataset, C, negSamplingCostAndGradient),
                  wordVectors,
                  0.3,
                  40000,
                  None,
                  True,
                  PRINT_EVERY=10)
# Note that normalization is not called here. This is not a bug,
# normalizing during training loses the notion of length.

print("sanity check: cost at convergence should be around or below 10")
print("training took %d seconds" % (time.time() - startTime))

# concatenate the input and output word vectors
wordVectors = np.concatenate(
    (wordVectors[:nWords, :], wordVectors[nWords:, :]), axis=0)
# wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:]

visualizeWords = [
예제 #6
0
def test_sgd_3(quad):
    t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None)
    assert abs(t3) <= 1e-6
예제 #7
0
def test_sgd_2(quad):
    t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None)
    assert abs(t2) <= 1e-6
예제 #8
0
def test_sgd_1(quad):
    """ Original normalization test defined in q3_word2vec.py; """

    t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None)
    assert abs(t1) <= 1e-6
예제 #9
0
def your_sanity_checks():
    """
    Use this space add any additional sanity checks by running:
        python q2_neural.py
    This function will not be called by the autograder, nor will
    your additional tests be graded.
    """
    print("Running your sanity checks...")
    ### YOUR CODE HERE
    PATH = os.getcwd()
    trainFile = os.path.join(PATH, "optdigits_train.txt")
    testFile = os.path.join(PATH, "optdigits_test.txt")

    with open(trainFile, 'r') as train_data:
        data = [line.split(',') for line in train_data.readlines()]
        for i in range(len(data)):
            for j in range(len(data[i])):
                data[i][j] = int(data[i][j])
        trainX = np.asarray(data)[:, :-1]
        indexY = np.transpose(np.asarray(data)[:, -1:])[0]
        shp = np.arange(indexY.shape[0])
        trainY = np.zeros((trainX.shape[0], 10))
        trainY[shp, indexY] = 1
        # print(trainY)

    with open(testFile, 'r') as test_data:
        data = [line.split(',') for line in test_data.readlines()]
        for i in range(len(data)):
            for j in range(len(data[i])):
                data[i][j] = int(data[i][j])
        testX = np.asarray(data)[:, :-1]
        indexY = np.transpose(np.asarray(data)[:, -1:])[0]
        shp = np.arange(indexY.shape[0])
        testY = np.zeros((testX.shape[0], 10))
        testY[shp, indexY] = 1

    dimensions = [64, 128, 10]

    siz = sum((dimensions[i] + 1) * dimensions[i + 1]
              for i in range(len(dimensions) - 1))
    params = np.random.randn(siz)

    trainChart = []
    testChart = []
    rate = 0.001
    for tenEpoc in range(500):
        sgd(forward_backward_prop, [trainX, trainY, params, dimensions],
            rate,
            10,
            PRINT_EVERY=100)
        trainChart.append(forward_test(trainX, trainY, params, dimensions))
        testChart.append(forward_test(testX, testY, params, dimensions))
        print("Train iter\t", 10 * tenEpoc, "\t:", trainChart[tenEpoc])
        print("Test iter\t", 10 * tenEpoc, "\t:", testChart[tenEpoc])
        if testChart[tenEpoc] < 130:
            rate = 0.0001

    fig = plt.figure()

    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.plot(np.arange(len(trainChart)), trainChart, np.arange(len(testChart)),
             testChart)
    plt.legend(['train', 'test'], loc='upper right')
    plt.show()
예제 #10
0
for i in xrange(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print("Training for reg=%f" % regularization)

    # We will do batch optimization
    weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels,
                                                  weights, regularization),
                  weights,
                  args.learning,
                  args.steps,
                  PRINT_EVERY=args.every)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print("Train accuracy (%%): %f" % trainAccuracy)

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print("Dev accuracy (%%): %f" % devAccuracy)

    # Save the results and weights
    results.append({
예제 #11
0
def test_sgd_3(quad):
    t3 = sgd(quad, -1.5, 0.01, 1000, PRINT_EVERY=None)
    assert abs(t3) <= 1e-6
예제 #12
0
def test_sgd_2(quad):
    t2 = sgd(quad, 0.0, 0.01, 1000, PRINT_EVERY=None)
    assert abs(t2) <= 1e-6
예제 #13
0
def test_sgd_1(quad):
    """ Original normalization test defined in q3_word2vec.py; """

    t1 = sgd(quad, 0.5, 0.01, 1000, PRINT_EVERY=None)
    assert abs(t1) <= 1e-6
예제 #14
0
devFeatures = np.zeros((nDev, dimVectors))
devLabels = np.zeros((nDev,), dtype=np.int32)
for i in xrange(nDev):
    words, devLabels[i] = devset[i]
    devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words)

# Try our regularization parameters
results = []
for regularization in REGULARIZATION:
    random.seed(3141)
    np.random.seed(59265)
    weights = np.random.randn(dimVectors, 5)
    print "Training for reg=%f" % regularization 

    # We will do batch optimization
    weights = sgd(lambda weights: softmaxRegression(trainFeatures, trainLabels, 
        weights, regularization, nopredictions=True), weights, 0.3, 200000, PRINT_EVERY=20000)

    # Test on train set
    _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights)
    trainAccuracy = accuracy(trainLabels, pred)
    print "Train accuracy (%%): %f" % trainAccuracy

    # Test on dev set
    _, _, pred = softmaxRegression(devFeatures, devLabels, weights)
    devAccuracy = accuracy(devLabels, pred)
    print "Dev accuracy (%%): %f" % devAccuracy
    print "Prediction VS ||Weights||^2: %f - %f" % \
        (predictionLoss(trainFeatures, trainLabels, weights),
         regularizationLoss(weights, 1.0))

    # Save the results and weights