def sanity_check(): """ Run python q4_softmaxreg.py. """ random.seed(314159) np.random.seed(265) dataset = StanfordSentiment() tokens = dataset.tokens() nWords = len(tokens) _, wordVectors0, _ = load_saved_params() wordVectors = (wordVectors0[:nWords,:] + wordVectors0[nWords:,:]) dimVectors = wordVectors.shape[1] dummy_weights = 0.1 * np.random.randn(dimVectors, 5) dummy_features = np.zeros((10, dimVectors)) dummy_labels = np.zeros((10,), dtype=np.int32) for i in xrange(10): words, dummy_labels[i] = dataset.getRandomTrainSentence() dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words) print "==== Gradient check for softmax regression ====" gradcheck_naive(lambda weights: softmaxRegression(dummy_features, dummy_labels, weights, 1.0, nopredictions = True), dummy_weights) print "\n=== Results ===" print softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 20 ############################################################################ # The following dimensions are Dx, H, Dy # ############################################################################ dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) # lables are 1 only where it is True for i in xrange(N): labels[i,random.randint(0,dimensions[2]-1)] = 1 ############################################################################ # All the params are packed here # # I think this a bad way - You need to initialize using normal distribution# # See the number of parameters in the q1.pdf # ############################################################################ params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( dimensions[1] + 1) * dimensions[2], ) gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, dimensions), params)
def test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10,3)) dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) print "==== Gradient check for skip-gram ====" gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors) gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) #print "\n==== Gradient check for CBOW ====" #gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors) #gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n=== Results ===" print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset) print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)
def test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] def getContexts(C,sz=50): contexts = [] for i in xrange(sz): C1 = random.randint(1,C) centerword, context = dataset.getRandomContext(C1) contexts.append((C1, centerword, context)) return contexts dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10,3)) dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) def getNegSamples(contexts): negsamples = [] for context in contexts: samples = [] for contextWord in context[2]: target = dummy_tokens[contextWord] indices = [target] for i in xrange(10): k = dataset.sampleTokenIdx() while k == target: k = dataset.sampleTokenIdx() indices.append(k) samples.append(indices) negsamples.append(samples) return negsamples # negsamples: [samples], # samples:[indices], indices:[rndSample1,..,rndSampleK] dataset.contexts = getContexts(5) print dataset.contexts dataset.negsamples = getNegSamples(dataset.contexts) print "==== Gradient check for skip-gram ====" gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors) gradcheck_naive(lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) #print "\n==== Gradient check for CBOW ====" #gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors) #gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n=== Results ===" print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, dataset.negsamples[0]) print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, dataset.negsamples[0], negSamplingCostAndGradient)
def my_test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10,3)) dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) print "==== Gradient check for skip-gram ====" # def word2vec_sgd_wrapper(word2vecModel, tokens, wordVectors, dataset, C, word2vecCostAndGradient = softmaxCostAndGradient): # params word2vecModel = skipgram tokens = dummy_tokens wordVectors = dummy_vectors C=5 word2vecCostAndGradient = softmaxCostAndGradient batchsize = 50 cost = 0.0 grad = np.zeros(wordVectors.shape) N = wordVectors.shape[0] inputVectors = wordVectors[:N/2,:] outputVectors = wordVectors[N/2:,:] for i in xrange(batchsize): C1 = random.randint(1,C) centerword, context = dataset.getRandomContext(C1) if word2vecModel == skipgram: denom = 1 else: denom = 1 def fInVec(inVec): c,gin,gout = word2vecModel(centerword, C1, context, tokens, inVec, outputVectors, dataset, word2vecCostAndGradient) return (c,gin) def fOutVec(outVec): c,gin,gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outVec, dataset, word2vecCostAndGradient) return (c,gout) c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient) # print gin # cost += c / batchsize / denom # grad[:N/2, :] += gin / batchsize / denom # grad[N/2:, :] += gout / batchsize / denom gradcheck_naive(fInVec, gin) gradcheck_naive(fOutVec, gout)
def sanity_check(): """ Run python q4_softmaxreg.py. """ random.seed(314159) np.random.seed(265) dataset = StanfordSentiment() tokens = dataset.tokens() nWords = len(tokens) _, wordVectors0, _ = load_saved_params() N = wordVectors0.shape[0]//2 #assert N == nWords wordVectors = (wordVectors0[:N,:] + wordVectors0[N:,:]) dimVectors = wordVectors.shape[1] dummy_weights = 0.1 * np.random.randn(dimVectors, 5) dummy_features = np.zeros((10, dimVectors)) dummy_labels = np.zeros((10,), dtype=np.int32) for i in range(10): words, dummy_labels[i] = dataset.getRandomTrainSentence() dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words) print("==== Gradient check for softmax regression ====") gradcheck_naive(lambda weights: softmaxRegression(dummy_features, dummy_labels, weights, 1.0, nopredictions = True), dummy_weights) print("\n=== Results ===") print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0)) dummy_weights = 0.1 * np.random.randn(40, 10) + 1.0 dummy_features = np.random.randn(2000, 40) dummy_labels = np.argmax(np.random.randn(2000, 10), axis=1) print(-np.log(0.1))#expected correct classification (random) = 1 in 10; #cost then becomes -np.log(0.1) print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) dummy_weights = 0.1 * np.random.randn(40, 80) + 1.0 dummy_features = np.random.randn(2000, 40) dummy_labels = np.argmax(np.random.randn(2000, 80), axis=1) print(-np.log(1./80))#expected correct classification (random) = 1 in 80; #cost then becomes -np.log(1./80) print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) dummy_weights = 0.1 * np.random.randn(40, 1000) + 1.0 dummy_features = np.random.randn(40000, 40) dummy_labels = np.argmax(np.random.randn(40000, 1000), axis=1) print(-np.log(1./1000))#expected correct classification (random) = 1 in 80; #cost then becomes -np.log(1./80) print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) print(np.exp(-softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]))
def your_sanity_checks(): """ Use this space add any additional sanity checks by running: python q2_neural.py This function will not be called by the autograder, nor will your additional tests be graded. """ print "Running your sanity checks..." ### YOUR CODE HERE input = np.array([[0.5, 0.3, -1.5, 0.0, -0.2]]) labels = np.array([[0.2, 0.2, 0.2, 0.2, 0.2]]) dimensions = [5, 3, 5] params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) gradcheck_naive(lambda params: forward_backward_prop(input, labels, params, dimensions), params)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 20 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in xrange(N): labels[i,random.randint(0,dimensions[2]-1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) gradcheck_naive(lambda params: forward_backward_prop(data, labels, params,dimensions), params)
def test_word2vec(): """ Interface to the dataset for negative sampling """ dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], \ [tokens[random.randint(0,4)] for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10, 3)) dummy_tokens = dict([("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]) print "==== Gradient check for skip-gram ====" gradcheck_naive( lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, softmaxCostAndGradient), dummy_vectors) gradcheck_naive( lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n==== Gradient check for CBOW ====" gradcheck_naive( lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, softmaxCostAndGradient), dummy_vectors) gradcheck_naive( lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n=== Results ===" print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset) print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset, negSamplingCostAndGradient) print cbow("a", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset) print cbow("a", 2, ["a", "b", "a", "c"], dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset, negSamplingCostAndGradient)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 20 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in xrange(N): #for each datapoint labels[i, random.randint( 0, dimensions[2] - 1 )] = 1 #iterates through each label one-hot vector and sets a random location from 1 to 10 as 1, everything else is init as zero (already) params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) def dummy(x): return forward_backward_prop(data, labels, params, dimensions) gradcheck_naive(dummy, params)
def your_sanity_checks(): """ Use this space add any additional sanity checks by running: python q2_neural.py This function will not be called by the autograder, nor will your additional tests be graded. """ print "Running your sanity checks..." ### YOUR CODE HERE N = 10 dimensions = [1, 5, 2] data = 10 * np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in xrange(N): labels[i, random.randint(0, dimensions[2] - 1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) gradcheck_naive( lambda params: forward_backward_prop(data, labels, params, dimensions), params)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 20 dimensions = [10, 5, 10] data = np.random.RandomState(42).randn( N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in xrange(N): labels[i, np.random.RandomState(42).randint(0, dimensions[2] - 1)] = 1 # print(labels) params = np.random.RandomState(42).randn( (dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) gradcheck_naive( lambda func_args: forward_backward_prop(data, labels, func_args, dimensions), params)
def your_sanity_checks(): """ Use this space add any additional sanity checks by running: python q2_neural.py This function will not be called by the autograder, nor will your additional tests be graded. """ print "Running your sanity checks..." ### YOUR CODE HERE N = 2 dimensions = [2, 2, 2] data = np.zeros((N, dimensions[0])) # each row will be a datum labels = np.zeros((N, dimensions[2])) labels[0, 0] = 1 labels[1, 1] = 1 params = np.ones((dimensions[0] + 1) * dimensions[1] + ( dimensions[1] + 1) * dimensions[2], ) quad = lambda x: forward_backward_prop(data, labels, x, dimensions) print quad(params) gradcheck_naive(quad, params)
def test_sigmoid(): """ Use this space to test your sigmoid implementation by running: python q2_sigmoid.py This function will not be called by the autograder, nor will your tests be graded. """ print("Running your tests...") ### YOUR CODE HERE x = np.array([[1, 2], [-1, -2]]) f = sigmoid(x) g = sigmoid_grad(f) print(f) f_ans = np.array([[0.73105858, 0.88079708], [0.26894142, 0.11920292]]) assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06) print(g) g_ans = np.array([[0.19661193, 0.10499359], [0.19661193, 0.10499359]]) assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06) def ff(x): f = sigmoid(x) return np.sum(f), sigmoid_grad(f) gradcheck_naive(ff, x)
def test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10,3)) dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) print "==== Gradient check for skip-gram ====" C1 = random.randint(1,5) centerword, context = dataset.getRandomContext(C1) negsamples = [] for contextWord in context: target = dummy_tokens[contextWord] indices = [target] for i in xrange(10): k = dataset.sampleTokenIdx() while k == target: k = dataset.sampleTokenIdx() indices.append(k) negsamples.append(indices) dataset.negsamples = negsamples gradcheck_naive(lambda vec: my_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, C1, centerword, context), dummy_vectors)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print("Running sanity check...") N = 20 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in range(N): labels[i, random.randint(0, dimensions[2] - 1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) gradcheck_naive( lambda params: forward_backward_prop( data, labels, params, dimensions, activation='sigmoid'), params) gradcheck_naive( lambda params: forward_backward_prop( data, labels, params, dimensions, activation='relu'), params)
def sanity_check(): random.seed(312159) np.random.seed(265) dataset = StanfordSentiment() tokens = dataset.tokens() nWords = len(tokens) _, wordVectors0, _ = load_saved_params() wordVectors = (wordVectors0[:nWords, :] + wordVectors0[nWords:, :]) dimVectors = wordVectors.shape[1] dummy_weights = 0.1 * np.random.randn(dimVectors, 5) dummy_features = np.zeros((10, dimVectors)) dummy_labels = np.zeros((10, ), dtype=np.int32) for i in range(10): words, dummy_labels[i] = dataset.getRandomTrainSentence() dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words) print("Gradient check for softmax regression") gradcheck_naive( lambda weights: softmaxRegression( dummy_features, dummy_labels, weights, 1.0, nopredictions=True), dummy_weights) print("\n===Results===") print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0))
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." # N: batch size. N = 20 dimensions = [10, 5, 10] # Each row will be a datum. data = np.random.randn(N, dimensions[0]) labels = np.zeros((N, dimensions[2])) # xrange(): range() in python 3. for i in xrange(N): labels[i, random.randint(0,dimensions[2]-1)] = 1 # A size-N training batch share the same DC value b1, b2. params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( dimensions[1] + 1) * dimensions[2], ) # f(params): return cost, grad gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, dimensions), params)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print("Running sanity check...") N = 300 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in range(N): labels[i,random.randint(0,dimensions[2]-1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( dimensions[1] + 1) * dimensions[2], ) #cost, _ = forward_backward_prop(data, labels, params, dimensions) # # expect to get 1 in 10 correct #print(np.exp(-cost)) # #cost is roughly correct gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, dimensions), params)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 20 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum,生成一个N*dimensions[0]的随机矩阵 labels = np.zeros((N, dimensions[2])) for i in xrange(N): labels[i, random.randint(0,dimensions[2]-1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + ( dimensions[1] + 1) * dimensions[2], ) #params是把所有的参数都放到一个向量里了 print "params.shape", params.shape print "type(params)", type(params) print "params", str(params) gradcheck_naive(lambda params: forward_backward_prop(data, labels, params, dimensions), params)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 20 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # 20*10 labels = np.zeros((N, dimensions[2])) # 20*10 for i in xrange(N): labels[i, random.randint(0, dimensions[2] - 1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) # 115*1,number of parameters # # forward_backward_prop(data, labels, params,dimensions) # gradcheck_naive( lambda params: forward_backward_prop(data, labels, params, dimensions), params)
def test_data_sizes(self): random_data_sizes = np.random.randint(1, 100, 5) for i in range(5): start = time.time() N = random_data_sizes[i] print("data size = {}".format(N)) dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) labels = np.zeros((N, dimensions[2])) for i in range(N): labels[i, random.randint(0, dimensions[2] - 1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) function = lambda params: forward_backward_prop( data, labels, params, dimensions) result = gradcheck_naive(function, params) self.assertTrue(np.sum(result) <= len(params) * 1e-5) end = time.time() print("Test time = {:f}(s)\n".format(end - start))
def test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10, 3)) dummy_tokens = dict([("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]) print "==== Gradient check for skip-gram ====" with Timer('skipgram softmaxCostAndGradient'): gradcheck_naive( lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors) with Timer('skipgram negSamplingCostAndGradient'): gradcheck_naive( lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n==== Gradient check for CBOW ====" with Timer('cbow softmaxCostAndGradient'): gradcheck_naive( lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors) with Timer('cbow negSamplingCostAndGradient'): gradcheck_naive( lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors)
def sanity_check(): """ Set up fake data and parameters for the neural network, and test using gradcheck. """ print "Running sanity check..." N = 1 dimensions = [10, 5, 10] data = np.random.randn(N, dimensions[0]) # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in xrange(N): labels[i, random.randint(0, dimensions[2] - 1)] = 1 params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (dimensions[1] + 1) * dimensions[2], ) reldiffs = gradcheck_naive( lambda params: forward_backward_prop(data, labels, params, dimensions), params) Dx, H, Dy = dimensions extract(reldiffs, Dx, H, Dy) return
def affine_check(): N = 2 D = 5 H = 3 upstream_grads = np.random.randn(N, H) x = np.random.randn(N, D) W = np.random.randn(D, H) b = np.random.randn(H) def affine_cost_x_grads(upstream_grads, x, W, b): b = b.reshape(1, len(b)) affine_ = np.dot(x, W) + b out = np.sum(np.multiply(upstream_grads, affine_)) grads = affine_grads(upstream_grads, x, W, b)[0] return (out, grads) def affine_cost_W_grads(upstream_grads, x, W, b): b = b.reshape(1, len(b)) affine_ = np.dot(x, W) + b out = np.sum(np.multiply(upstream_grads, affine_)) grads = affine_grads(upstream_grads, x, W, b)[1] return (out, grads) def affine_cost_b_grads(upstream_grads, x, W, b): b = b.reshape(1, len(b)) affine_ = np.dot(x, W) + b out = np.sum(np.multiply(upstream_grads, affine_)) grads = affine_grads(upstream_grads, x, W, b)[2] return (out, grads) print('running affine grads check') print('running affine W grads check') gradcheck_naive(lambda W: affine_cost_W_grads(upstream_grads, x, W, b), W) print('running affine b grads check') gradcheck_naive(lambda b: affine_cost_b_grads(upstream_grads, x, W, b), b) print('running affine x grads check') gradcheck_naive(lambda x: affine_cost_x_grads(upstream_grads, x, W, b), x)
def sanity_check(): """ Run python q4_softmaxreg.py. """ random.seed(314159) np.random.seed(265) dataset = StanfordSentiment() tokens = dataset.tokens() nWords = len(tokens) _, wordVectors0, _ = load_saved_params() N = wordVectors0.shape[0] // 2 #assert N == nWords wordVectors = (wordVectors0[:N, :] + wordVectors0[N:, :]) dimVectors = wordVectors.shape[1] dummy_weights = 0.1 * np.random.randn(dimVectors, 5) dummy_features = np.zeros((10, dimVectors)) dummy_labels = np.zeros((10, ), dtype=np.int32) for i in range(10): words, dummy_labels[i] = dataset.getRandomTrainSentence() dummy_features[i, :] = getSentenceFeature(tokens, wordVectors, words) print("==== Gradient check for softmax regression ====") gradcheck_naive( lambda weights: softmaxRegression( dummy_features, dummy_labels, weights, 1.0, nopredictions=True), dummy_weights) print("\n=== Results ===") print(softmaxRegression(dummy_features, dummy_labels, dummy_weights, 1.0)) dummy_weights = 0.1 * np.random.randn(40, 10) + 1.0 dummy_features = np.random.randn(2000, 40) dummy_labels = np.argmax(np.random.randn(2000, 10), axis=1) print(-np.log(0.1)) #expected correct classification (random) = 1 in 10; #cost then becomes -np.log(0.1) print( softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) dummy_weights = 0.1 * np.random.randn(40, 80) + 1.0 dummy_features = np.random.randn(2000, 40) dummy_labels = np.argmax(np.random.randn(2000, 80), axis=1) print( -np.log(1. / 80)) #expected correct classification (random) = 1 in 80; #cost then becomes -np.log(1./80) print( softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) dummy_weights = 0.1 * np.random.randn(40, 1000) + 1.0 dummy_features = np.random.randn(40000, 40) dummy_labels = np.argmax(np.random.randn(40000, 1000), axis=1) print(-np.log( 1. / 1000)) #expected correct classification (random) = 1 in 80; #cost then becomes -np.log(1./80) print( softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]) print( np.exp(-softmaxRegression(dummy_features, dummy_labels, dummy_weights, 0.0)[0]))
def test_cbow_negative_sampling(self): self.assertEqual(True, gradcheck_naive(self.word2vec(cbow, negSamplingCostAndGradient), self.vectors))
def test_word2vec(): """ Interface to the dataset for negative sampling """ """ Create a dummy dataset class using the type function. The first argument is the __name__ attribute of the class, the second parameter is the base class, and the third a dict containing the class attributes. """ dataset = type('dummy', (), {})() def dummySampleTokenIdx(): # return an integer ranging from (0, 4) return random.randint(0, 4) """ This function basically creates the window for testing the word2vec approach. A center word is choosen and then the context words are chosen after that. """ def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], \ [tokens[random.randint(0,4)] for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) """ In this test, the word vectos will have the dimension 10X3 """ dummy_vectors = normalizeRows(np.random.randn(10,3)) dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) print "==== Gradient check for skip-gram ====" gradcheck_naive(lambda vec: word2vec_sgd_wrapper( skipgram, dummy_tokens, vec, dataset, 5, softmaxCostAndGradient), dummy_vectors) gradcheck_naive(lambda vec: word2vec_sgd_wrapper( skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n==== Gradient check for CBOW ====" gradcheck_naive(lambda vec: word2vec_sgd_wrapper( cbow, dummy_tokens, vec, dataset, 5, softmaxCostAndGradient), dummy_vectors) gradcheck_naive(lambda vec: word2vec_sgd_wrapper( cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n=== Results ===" print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset) print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient) print cbow("a", 2, ["a", "b", "c", "a"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset) print cbow("a", 2, ["a", "b", "a", "c"], dummy_tokens, dummy_vectors[:5,:], dummy_vectors[5:,:], dataset, negSamplingCostAndGradient)
def test_softmax_regression(self): self.assertEqual(True, gradcheck_naive(self.grad_and_cost(), self.weights))
def my_test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10, 3)) dummy_tokens = dict([("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]) print "==== Gradient check for skip-gram ====" # def word2vec_sgd_wrapper(word2vecModel, tokens, wordVectors, dataset, C, word2vecCostAndGradient = softmaxCostAndGradient): # params word2vecModel = skipgram tokens = dummy_tokens wordVectors = dummy_vectors C = 5 word2vecCostAndGradient = softmaxCostAndGradient batchsize = 50 cost = 0.0 grad = np.zeros(wordVectors.shape) N = wordVectors.shape[0] inputVectors = wordVectors[:N / 2, :] outputVectors = wordVectors[N / 2:, :] for i in xrange(batchsize): C1 = random.randint(1, C) centerword, context = dataset.getRandomContext(C1) if word2vecModel == skipgram: denom = 1 else: denom = 1 def fInVec(inVec): c, gin, gout = word2vecModel(centerword, C1, context, tokens, inVec, outputVectors, dataset, word2vecCostAndGradient) return (c, gin) def fOutVec(outVec): c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outVec, dataset, word2vecCostAndGradient) return (c, gout) c, gin, gout = word2vecModel(centerword, C1, context, tokens, inputVectors, outputVectors, dataset, word2vecCostAndGradient) # print gin # cost += c / batchsize / denom # grad[:N/2, :] += gin / batchsize / denom # grad[N/2:, :] += gout / batchsize / denom gradcheck_naive(fInVec, gin) gradcheck_naive(fOutVec, gout)
def test_forward_backward_prop(self): self.assertEqual(True, gradcheck_naive(self.cost_and_grad, self.params))
def test_word2vec(): # Interface to the dataset for negative sampling dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], [tokens[random.randint(0,4)] \ for i in xrange(2*C)] def getContexts(C, sz=50): contexts = [] for i in xrange(sz): C1 = random.randint(1, C) centerword, context = dataset.getRandomContext(C1) contexts.append((C1, centerword, context)) return contexts dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10, 3)) dummy_tokens = dict([("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]) def getNegSamples(contexts): negsamples = [] for context in contexts: samples = [] for contextWord in context[2]: target = dummy_tokens[contextWord] indices = [target] for i in xrange(10): k = dataset.sampleTokenIdx() while k == target: k = dataset.sampleTokenIdx() indices.append(k) samples.append(indices) negsamples.append(samples) return negsamples # negsamples: [samples], # samples:[indices], indices:[rndSample1,..,rndSampleK] dataset.contexts = getContexts(5) print dataset.contexts dataset.negsamples = getNegSamples(dataset.contexts) print "==== Gradient check for skip-gram ====" gradcheck_naive( lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5), dummy_vectors) gradcheck_naive( lambda vec: word2vec_sgd_wrapper(skipgram, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) #print "\n==== Gradient check for CBOW ====" #gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5), dummy_vectors) #gradcheck_naive(lambda vec: word2vec_sgd_wrapper(cbow, dummy_tokens, vec, dataset, 5, negSamplingCostAndGradient), dummy_vectors) print "\n=== Results ===" print skipgram("c", 3, ["a", "b", "e", "d", "b", "c"], dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset, dataset.negsamples[0]) print skipgram("c", 1, ["a", "b"], dummy_tokens, dummy_vectors[:5, :], dummy_vectors[5:, :], dataset, dataset.negsamples[0], negSamplingCostAndGradient)
def your_sanity_checks(): """ Use this space add any additional sanity checks by running: python q2_neural.py This function will not be called by the autograder, nor will your additional tests be graded. """ print("Running your sanity checks...") N = 2 dimensions = [1, 1, 2] data = 10. * np.random.randn(N, dimensions[0]) # each row will be a datum data2 = 10. * np.random.randn(N, dimensions[1]) # each row will be a datum data3 = np.random.randn( N, dimensions[2]) / 2. + .5 # each row will be a datum labels = np.zeros((N, dimensions[2])) for i in range(N): labels[i, random.randint(0, dimensions[2] - 1)] = 1 W2 = np.random.randn(dimensions[1] * dimensions[2]) b2 = np.random.randn(dimensions[2]) W = np.random.randn(dimensions[0] * dimensions[1]) b1 = np.random.randn(dimensions[1]) print("\nsoftmax_ce_loss\n") gradcheck_naive(lambda x: softmax_ce_loss(x, labels), data3) print("\nsoftmax_ce\n") gradcheck_naive(lambda x: ce_loss(x, labels), data3) print("\nsoftmax_ce_loss_w\n") gradcheck_naive( lambda params: softmax_ce_loss_w(data2, params, labels, dimensions), W2) print("\nsoftmax_ce_loss_w_sigmoid\n") gradcheck_naive( lambda x: softmax_ce_loss_w_sigmoid(x, W2, labels, dimensions), data2) print("\nsoftmax_ce_loss_w_w2_sigmoid\n") gradcheck_naive( lambda W: softmax_ce_loss_w_w2_sigmoid(data, np.concatenate( (W, W2)), labels, dimensions), W) return import h5py import glob sample_files = glob.glob("C:\\tmp\\sample*.h5") for file in sample_files: with h5py.File(file) as f: data = f["Input"][...] labels = f["Target"][...] N = data.shape[1] dimensions = [2, 3, 3] l1Weights = f["L1Weights"][...] l1Biases = f["L1Biases"][...] l2Weights = f["L2Weights"][...] l2Biases = f["L2Biases"][...] loss = f["Loss"][...] z1 = f["z1"][...] l1 = f["l1"][...] params = np.hstack([ l1Weights.T.flatten(), l1Biases.flatten(), l2Weights.T.flatten(), l2Biases.flatten() ]) print("gradW2 (wl): {}".format(f["GradientL2Weights"][...])) print("loss: {}".format(loss)) cost, grad = forward_backward_prop(data, labels, params, dimensions) print("cost: {}, should: {}".format(cost, np.sum(loss)))
def test_cbow_softmax(self): self.assertEqual(True, gradcheck_naive(self.word2vec(cbow, softmaxCostAndGradient), self.vectors))