import utils.utils as u import utils.stats as stat import utils.preprocessor_methods as pr import storage.data as d from numpy import * from pylab import * from sklearn.metrics import precision_recall_fscore_support logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') d.set_file_names() docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = d.get_data() """ MaxEnt: { 'vect__ngram_range': (1, 1), 'vect__smooth_idf': True, 'vect__max_df': 0.5, 'vect__sublinear_tf': True, 'vect__preprocessor': <function placeholders at 0x9be31b4>, 'clf__penalty': 'l1', 'clf__C': 1.0, 'vect__use_idf': True } """ c1_vect_options = {
import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import utils.utils as u import utils.stats as s import storage.data as d from pylab import arange, savefig logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s') d.set_file_names() docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = d.get_data( ) # labels = ['NB', 'SVM', 'MaxEnt', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Combined (Foo -> Bar)', 'Boosting'] # values = [0.549392, 0.34543453, 0.23432999, 0.654213, 0.3213495, 0.65343, 0.53999211, 0.34543453, 0.23432999, 0.654213] labels = [] values = [] def test(clf): global labels, values y_predict = clf.predict(docs_test) score = u.score(y_test, y_predict) labels.append(str(clf))
import sys import logging from models import * import storage.data as d import storage.prediction_exporter as pe import utils.stats as s import utils.preprocessor_methods as pr logging.basicConfig(level=logging.DEBUG, format="%(asctime)s %(message)s") docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = ( d.get_data() ) pe.set_base_from_dataset(d.get_full_test_set()) vect_options = { "ngram_range": (1, 1), "sublinear_tf": True, "preprocessor": pr.remove_noise, "use_idf": False, "stop_words": None, } default_options = {"C": 1.0} clf = SVM(docs_train, y_train, default_options=default_options, vect_options=vect_options) if len(sys.argv) > 1: