def train(): """ Builds the SVM based on training data. """ features, labels = __init__.load_data('train') vectorizer = text.CountVectorizer(decode_error='ignore', stop_words='english') transformer = text.TfidfTransformer() classifier = linear_model.SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, tol=1e-3, random_state=42) # Serializes the processing steps that would be required of the above. text_clf = pipeline.Pipeline( steps=[('vect', vectorizer), ('tfidf', transformer), ('clf-sgdc', classifier)]) start = time.time() text_clf.fit(features, labels) print 'Training time:\t%1.4f seconds' % (time.time() - start) __init__.evaluate(text_clf, features, labels) return text_clf
def train(): """Builds the random forest based on training data.""" features, labels = __init__.load_data('train') vectorizer = text.CountVectorizer(decode_error='ignore', stop_words='english') transformer = text.TfidfTransformer() classifier = ensemble.RandomForestClassifier(n_estimators=10) text_clf = pipeline.Pipeline( steps=[('vect', vectorizer), ('tfidf', transformer), ('clf-rf', classifier)]) start = time.time() text_clf.fit(features, labels) print 'Training time:\t%1.4f seconds' % (time.time() - start) __init__.evaluate(text_clf, features, labels) return text_clf
def train(): """ Builds the classifier based on training data. """ features, labels = __init__.load_data('train') vectorizer = text.CountVectorizer(decode_error='ignore', stop_words='english') transformer = text.TfidfTransformer() classifier = linear_model.LogisticRegression(solver='lbfgs') # Serializes the processing steps that would be required of the above. text_clf = pipeline.Pipeline(steps=[('vect', vectorizer), ('tfidf', transformer), ('clf-lr', classifier)]) start = time.time() text_clf.fit(features, labels) print 'Training time:\t%1.4f seconds' % (time.time() - start) __init__.evaluate(text_clf, features, labels) return text_clf
def train(): """ Builds the SVM based on training data. """ features, labels = __init__.load_data('train') vectorizer = text.CountVectorizer(decode_error='ignore', stop_words='english') transformer = text.TfidfTransformer() classifier = svm.SVR(kernel='sigmoid', gamma='scale') # Serializes the processing steps that would be required of the above. text_clf = pipeline.Pipeline( steps=[('vect', vectorizer), ('tfidf', transformer), ('clf-svr', classifier)]) start = time.time() text_clf.fit(features, labels) print 'Training time:\t%1.4f seconds' % (time.time() - start) __init__.evaluate(text_clf, features, labels) return text_clf
def test(model): """Tests the classifier based on test data.""" features, labels = __init__.load_data('test') __init__.evaluate(model, features, labels)
def dev(model): """Tests the classifier based on dev data.""" features, labels = __init__.load_data('dev') __init__.evaluate(model, features, labels)
def test(model): """Tests the random forest based on test data.""" features, labels = __init__.load_data('test') __init__.evaluate(model, features, labels)
def dev(model): """Tests the random forest based on dev data.""" features, labels = __init__.load_data('dev') __init__.evaluate(model, features, labels)
def test_str(self): self.assertEqual(prose.evaluate(prose.read(STR)), prose.read(STR))
def test_var(self): prose.variables[VAR] = prose.evaluate(prose.read(INT)) self.assertEqual(prose.evaluate(prose.read(VAR)), prose.read(INT)) del prose.variables[VAR]
def test_int(self): self.assertEqual(prose.evaluate(prose.read(INT)), prose.read(INT))
def test_empty_list(self): self.assertEqual(prose.evaluate(prose.read(())), prose.read(()))