-
Notifications
You must be signed in to change notification settings - Fork 0
/
checkCode.py
134 lines (105 loc) · 5.52 KB
/
checkCode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/python
import graderUtil
import util
import time
from util import *
grader = graderUtil.Grader()
submission = grader.load('submission')
############################################################
# Problem 1: warmup
############################################################
grader.addManualPart('1a', maxPoints=2, description='simulate SGD')
grader.addManualPart('1b', maxPoints=2, description='create small dataset')
############################################################
# Problem 2: predicting movie ratings
############################################################
grader.addManualPart('2a', maxPoints=2, description='loss')
grader.addManualPart('2b', maxPoints=3, description='gradient')
grader.addManualPart('2c', maxPoints=3, description='smallest magnitude')
grader.addManualPart('2d', maxPoints=3, description='largest magnitude')
grader.addManualPart('2e', maxPoints=3, description='linear regression')
############################################################
# Problem 3: sentiment classification
############################################################
### 3a
# Basic sanity check for feature extraction
def test3a0():
ans = {"a":2, "b":1}
grader.requireIsEqual(ans, submission.extractWordFeatures("a b a"))
grader.addBasicPart('3a-0-basic', test3a0, maxSeconds=1, description="basic test")
def test3a1():
random.seed(42)
for i in range(10):
sentence = ' '.join([random.choice(['a', 'aa', 'ab', 'b', 'c']) for _ in range(100)])
submission_ans = submission.extractWordFeatures(sentence)
grader.addHiddenPart('3a-1-hidden', test3a1, maxSeconds=1, description="test multiple instances of the same word in a sentence")
### 3b
def test3b0():
trainExamples = (("hello world", 1), ("goodnight moon", -1))
testExamples = (("hello", 1), ("moon", -1))
featureExtractor = submission.extractWordFeatures
weights = submission.learnPredictor(trainExamples, testExamples, featureExtractor, numIters=20, eta=0.01)
grader.requireIsGreaterThan(0, weights["hello"])
grader.requireIsLessThan(0, weights["moon"])
grader.addBasicPart('3b-0-basic', test3b0, maxSeconds=1, description="basic sanity check for learning correct weights on two training and testing examples each")
def test3b1():
trainExamples = (("hi bye", 1), ("hi hi", -1))
testExamples = (("hi", -1), ("bye", 1))
featureExtractor = submission.extractWordFeatures
weights = submission.learnPredictor(trainExamples, testExamples, featureExtractor, numIters=20, eta=0.01)
grader.requireIsLessThan(0, weights["hi"])
grader.requireIsGreaterThan(0, weights["bye"])
grader.addBasicPart('3b-1-basic', test3b1, maxSeconds=1, description="test correct overriding of positive weight due to one negative instance with repeated words")
def test3b2():
trainExamples = readExamples('users.tsv', 'apps.tsv')
print "oj"
devExamples = trainExamples
print "done"
featureExtractor = submission.extractWordFeatures
weights = submission.learnPredictor(trainExamples, devExamples, featureExtractor, numIters=20, eta=0.01)
outputWeights(weights, 'weights')
outputErrorAnalysis(devExamples, featureExtractor, weights, 'error-analysis') # Use this to debug
trainError = evaluatePredictor(trainExamples, lambda(x) : (1 if dotProduct(featureExtractor(x), weights) >= 0 else -1))
devError = evaluatePredictor(devExamples, lambda(x) : (1 if dotProduct(featureExtractor(x), weights) >= 0 else -1))
print "Official: train error = %s, dev error = %s" % (trainError, devError)
grader.requireIsLessThan(0.04, trainError)
grader.requireIsLessThan(0.30, devError)
grader.addBasicPart('3b-2-basic', test3b2, maxPoints=2, maxSeconds=2000, description="test classifier on real polarity dev dataset")
### 3c
def test3c0():
weights = {"hello":1, "world":1}
data = submission.generateDataset(5, weights)
for datapt in data:
grader.requireIsEqual((util.dotProduct(datapt[0], weights) >= 0), (datapt[1] == 1))
grader.addBasicPart('3c-0-basic', test3c0, maxSeconds=1, description="test correct generation of random dataset labels")
def test3c1():
weights = {}
for i in range(100):
weights[str(i + 0.1)] = 1
data = submission.generateDataset(100, weights)
for datapt in data:
grader.requireIsEqual(False, dotProduct(datapt[0], weights) == 0)
grader.addBasicPart('3c-1-basic', test3c1, maxSeconds=1, description="test that the randomly generated example actually coincides with the given weights")
### 3d
grader.addManualPart('3d', maxPoints=2, description='error analysis')
### 3e
def test3e0():
fe = submission.extractCharacterFeatures(3)
sentence = "hello world"
ans = {"hel":1, "ell":1, "llo":1, "low":1, "owo":1, "wor":1, "orl":1, "rld":1}
grader.requireIsEqual(ans, fe(sentence))
grader.addBasicPart('3e-0-basic', test3e0, maxSeconds=1, description="test basic character n-gram features")
def test3e1():
random.seed(42)
for i in range(10):
sentence = ' '.join([random.choice(['a', 'aa', 'ab', 'b', 'c']) for _ in range(100)])
for n in range(1, 4):
submission_ans = submission.extractCharacterFeatures(n)(sentence)
grader.addHiddenPart('3e-1-hidden', test3e1, maxSeconds=1, description="test feature extraction on repeated character n-grams")
### 3f
grader.addManualPart('3f', maxPoints=3, description='explain value of n-grams')
grader.addManualPart('4a', maxPoints=2, description='simulating 2-means')
# basic test for k-means
grader.addManualPart('4c', maxPoints=5, description='handling same-cluster constraints')
grader.grade()
grader.grade()