def test_softmax_gradient(samples, features, classes, check_count=20): dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0 dummy_features = np.random.randn(samples, features) dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1) #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly) #assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 f = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[0] g = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[1] W = dummy_weights grad_analytic = g(W) for i in range(check_count): ix = tuple([random.randrange(m) for m in W.shape]) shift = np.zeros(W.shape) shift[ix] = 1e-7 grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) assert (abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0002)
def test_softmax_regression(samples, features, classes): dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0 dummy_features = np.random.randn(samples, features) dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1) #np.sqrt(p * (1 - p) / N ) #n/N #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly) assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15
def test_softmax_gradient(samples, features, classes, check_count=20): dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0 dummy_features = np.random.randn(samples, features) dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1) #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly) #assert np.abs(-np.log(1./classes) - (softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0])) / -np.log(1./classes) <= 0.15 f = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[0] g = lambda w: softmaxRegression(dummy_features, dummy_labels, w, regularization=0.0, nopredictions=False)[1] W = dummy_weights grad_analytic = g(W) for i in range(check_count): ix = tuple([random.randrange(m) for m in W.shape]) shift = np.zeros(W.shape) shift[ix] = 1e-7 grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7) assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0002)
def test_softmax_regression(samples, features, classes): dummy_weights = 0.1 * np.random.randn(features, classes) + 1.0 dummy_features = np.random.randn(samples, features) dummy_labels = np.argmax(np.random.randn(samples, classes), axis=1) #np.sqrt(p * (1 - p) / N ) #n/N #rough estimate of how many should be correct (assuming random: -np.log(p), where p is the probability of guessing correctly) assert np.abs(-np.log(1. / classes) - ( softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) ) / -np.log(1. / classes) <= 0.15
def getSentiment(): dataSet = StanfordSentiment() tokens = dataSet.tokens() # Load the word vectors we trained earlier _, wordVectors, _ = load_saved_params() dimVectors = wordVectors.shape[1] # Load the test sentences sentences = dataSet.getTestSentences() print sentences nSentences = len(sentences) sentenceFeatures = np.zeros((nSentences, dimVectors)) sentenceLabels = np.zeros((nSentences, ), dtype=np.int32) for i in xrange(nSentences): words, sentenceLabels[i] = sentences[i] #print sentences[i] #print words #print sentenceLabels[i] sentenceFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) #train weights random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, nSentences) regularization = 0.00001 weights = sgd(lambda weights: softmax_wrapper( sentenceFeatures, sentenceLabels, weights, regularization), weights, 3.0, 10, PRINT_EVERY=10) #pred = np.sum((weights * sentenceFeatures.T).T, axis = 1) #pred = softmax(pred) #testAccuracy = accuracy(sentenceLabels, pred) prob = softmax(sentenceFeatures.dot(weights)) #prob = normalizeRows(prob) _, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights) pred = 1 / (1 + np.exp(-pred)) #for polarity in pred: #print categorify(polarity) #_, _, pred = softmaxRegression(sentenceFeatures, sentenceLabels, weights) for label in pred: print categorify(label) #pred = categorify(pred) print prob print pred
for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({ "reg": regularization, "weights": weights, "train": trainAccuracy, "dev": devAccuracy })
devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({ "reg" : regularization, "weights" : weights, "train" : trainAccuracy, "dev" : devAccuracy})
for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights, regularization) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights, regularization) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({ "reg": regularization, "weights": weights, "train": trainAccuracy, "dev": devAccuracy
devFeatures = np.zeros((nDev, dimVectors)) devLabels = np.zeros((nDev,), dtype=np.int32) for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmaxRegression(trainFeatures, trainLabels, weights, regularization, nopredictions=True), weights, 0.3, 200000, PRINT_EVERY=20000) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy print "Prediction VS ||Weights||^2: %f - %f" % \ (predictionLoss(trainFeatures, trainLabels, weights), regularizationLoss(weights, 1.0)) # Save the results and weights