def create_data(): training_files = _get_training_files() series_training_files = _get_series_of_files(training_files) for series in series_training_files: data = Data(False, False) data.training = TrainingSet(False, False, *series) yield data
def _run(self): data = Data() data.training = self.training system = System(data) system.use_best_feature_set() system.create_features() system.train() system.eval(quiet=True) self.result_event_event = system.evaluation_accuracy_event_event self.result_event_timex = system.evaluation_accuracy_event_timex
def _run_systems(self): for k in range(1, self.max_len+1): features = list(set(self._feature_series(k, self.features_event_event) + self._feature_series(k, self.features_event_timex))) print features data = Data() data.training = TrainingSet(False, False, "data/training/TBAQ-cleaned/TimeBank/") system = System(data, features) system.create_features() system.cross_validation() now = list(set(self._feature_series(k, self.features_event_event))) if k > 1: prev = list(set(self._feature_series(k-1, self.features_event_event))) if k > 1: if now != prev: self.accuracies_event_event.append(system.crossval_accuracy_event_event) print system.crossval_accuracy_event_event else: self.accuracies_event_event.append(system.crossval_accuracy_event_event) print system.crossval_accuracy_event_event now = list(set(self._feature_series(k, self.features_event_timex))) if k > 1: prev = list(set(self._feature_series(k-1, self.features_event_timex))) if k > 1: if now != prev: self.accuracies_event_timex.append(system.crossval_accuracy_event_timex) print system.crossval_accuracy_event_timex else: self.accuracies_event_timex.append(system.crossval_accuracy_event_timex) print system.crossval_accuracy_event_timex print
# Leave out i for testing train_X, test_X = leave_out(pieces_X, i) train_y, test_y = leave_out(pieces_y, i) # Transform back to sparse train_X = transform_to_sparse_matrix(train_X) test_X = transform_to_sparse_matrix(test_X) clf.fit(train_X, train_y) predicted = clf.predict(test_X) accs.append(accuracy_score(test_y, predicted)) return np.mean(accs) data = Data() data.training = TrainingSet(False, False, "data/training/TBAQ-cleaned/TimeBank/") system = System(data, ["lemma", "token"]) system.create_features() X_event_event, y_event_event = system.training_event_event X_event_timex, y_event_timex = system.training_event_timex X_event_event = transform_to_list(X_event_event) X_event_timex = transform_to_list(X_event_timex) print kfold(X_event_event, y_event_event, 5) print kfold(X_event_timex, y_event_timex, 5)