def create_data():
    training_files = _get_training_files()
    series_training_files = _get_series_of_files(training_files)

    for series in series_training_files:
        data = Data(False, False)
        data.training = TrainingSet(False, False, *series)

        yield data
    def _run(self):
        data = Data()
        data.training = self.training

        system = System(data)
        system.use_best_feature_set()
        system.create_features()
        system.train()
        system.eval(quiet=True)

        self.result_event_event = system.evaluation_accuracy_event_event
        self.result_event_timex = system.evaluation_accuracy_event_timex
    def _run_systems(self):
        for k in range(1, self.max_len+1):
            features = list(set(self._feature_series(k, self.features_event_event) + self._feature_series(k, self.features_event_timex)))
            print features

            data = Data()
            data.training = TrainingSet(False, False, "data/training/TBAQ-cleaned/TimeBank/")

            system = System(data, features)
            system.create_features()
            system.cross_validation()

            now = list(set(self._feature_series(k, self.features_event_event)))
            if k > 1:
                prev = list(set(self._feature_series(k-1, self.features_event_event)))

            if k > 1:
                if now != prev:
                    self.accuracies_event_event.append(system.crossval_accuracy_event_event)
                    print system.crossval_accuracy_event_event
            else:
                self.accuracies_event_event.append(system.crossval_accuracy_event_event)
                print system.crossval_accuracy_event_event


            now = list(set(self._feature_series(k, self.features_event_timex)))
            if k > 1:
                prev = list(set(self._feature_series(k-1, self.features_event_timex)))
            if k > 1:
                if now != prev:
                    self.accuracies_event_timex.append(system.crossval_accuracy_event_timex)
                    print system.crossval_accuracy_event_timex
            else:
                self.accuracies_event_timex.append(system.crossval_accuracy_event_timex)
                print system.crossval_accuracy_event_timex

            print
        # Leave out i for testing
        train_X, test_X = leave_out(pieces_X, i)
        train_y, test_y = leave_out(pieces_y, i)

        # Transform back to sparse
        train_X = transform_to_sparse_matrix(train_X)
        test_X = transform_to_sparse_matrix(test_X)

        clf.fit(train_X, train_y)
        predicted = clf.predict(test_X)

        accs.append(accuracy_score(test_y, predicted))

    return np.mean(accs)

data = Data()
data.training = TrainingSet(False, False, "data/training/TBAQ-cleaned/TimeBank/")

system = System(data, ["lemma", "token"])
system.create_features()

X_event_event, y_event_event = system.training_event_event
X_event_timex, y_event_timex = system.training_event_timex

X_event_event = transform_to_list(X_event_event)
X_event_timex = transform_to_list(X_event_timex)

print kfold(X_event_event, y_event_event, 5)
print kfold(X_event_timex, y_event_timex, 5)