def get_data(): global train, test test = u.normalize_test_set_classification_scheme(test) train = u.normalize_test_set_classification_scheme(train) # Normalize data? train = u.reduce_dataset(train, 3000) # To compansate for poor TSV data structure i_d = 4 if len(test[0]) > 4 else 3 t_d = 4 if len(train[0]) > 4 else 3 docs_test, y_test = test[:,i_d], test[:,i_d-1] docs_train, y_train = train[:,t_d], train[:,t_d-1] docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity = u.generate_two_part_dataset(train) return docs_test, y_test, docs_train, y_train, docs_train_subjectivity, y_train_subjectivity, docs_train_polarity, y_train_polarity
def __init__(self, sub_clf_options, pol_clf_options, train): train_subjectivity, train_polarity = utils.generate_two_part_dataset(train) self.subjectivity_clf = BaseMethod(train_subjectivity, **sub_clf_options) self.polarity_clf = BaseMethod(train_polarity, **pol_clf_options) self.best_score = (self.subjectivity_clf.best_score + self.polarity_clf.best_score) / 2