def test_word2vec(): """ Test the two word2vec implementations, before running on Stanford Sentiment Treebank """ dataset = type('dummy', (), {})() def dummySampleTokenIdx(): return random.randint(0, 4) def getRandomContext(C): tokens = ["a", "b", "c", "d", "e"] return tokens[random.randint(0,4)], \ [tokens[random.randint(0,4)] for i in range(2*C)] dataset.sampleTokenIdx = dummySampleTokenIdx dataset.getRandomContext = getRandomContext random.seed(31415) np.random.seed(9265) dummy_vectors = normalizeRows(np.random.randn(10,3)) dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)]) print("==== Gradient check for skip-gram with naiveSoftmaxLossAndGradient ====") gradcheck_naive(lambda vec: word2vec_sgd_wrapper( skipgram, dummy_tokens, vec, dataset, 5, naiveSoftmaxLossAndGradient), dummy_vectors, "naiveSoftmaxLossAndGradient Gradient") grad_tests_softmax(skipgram, dummy_tokens, dummy_vectors, dataset) print("==== Gradient check for skip-gram with negSamplingLossAndGradient ====") gradcheck_naive(lambda vec: word2vec_sgd_wrapper( skipgram, dummy_tokens, vec, dataset, 5, negSamplingLossAndGradient), dummy_vectors, "negSamplingLossAndGradient Gradient") grad_tests_negsamp(skipgram, dummy_tokens, dummy_vectors, dataset, negSamplingLossAndGradient)
def test_skipgram(): """ Test skip-gram with naiveSoftmaxLossAndGradient """ dataset, dummy_vectors, dummy_tokens = getDummyObjects() print("==== Gradient check for skip-gram with naiveSoftmaxLossAndGradient ====") gradcheck_naive(lambda vec: word2vec_sgd_wrapper( skipgram, dummy_tokens, vec, dataset, 5, naiveSoftmaxLossAndGradient), dummy_vectors, "naiveSoftmaxLossAndGradient Gradient") grad_tests_softmax(skipgram, dummy_tokens, dummy_vectors, dataset) print("==== Gradient check for skip-gram with negSamplingLossAndGradient ====") gradcheck_naive(lambda vec: word2vec_sgd_wrapper( skipgram, dummy_tokens, vec, dataset, 5, negSamplingLossAndGradient), dummy_vectors, "negSamplingLossAndGradient Gradient") grad_tests_negsamp(skipgram, dummy_tokens, dummy_vectors, dataset, negSamplingLossAndGradient)