def compute_cross_correlation_score(df, clfs, preprocess_scaling=True, nFold=10):
    """
    return an iterator with cross validation data
    :param df:
    :param clfs:
    :param preprocess_scaling:
    :param nFold:
    :return:
    """

    to_sklearn_features = DataFrameMapper([('features', sklearn.feature_extraction.DictVectorizer())])

    data_X = to_sklearn_features.fit_transform(df)
    data_Y = df.expected_class

    skf = cross_validation.StratifiedKFold(data_Y, n_folds=nFold)
    classification_results = []
    scores = []
    for num, (train_index, test_index) in enumerate(skf):
        X_train, X_test = data_X[train_index], data_X[test_index]
        Y_train, Y_test = data_Y[train_index], data_Y[test_index]
        print("Len train{}, Len test{}".format(Y_train.size, Y_test.size))
        cross_valid_data = Cross_validation_split(X_train, X_test, Y_train, Y_test)
        cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=False)

        for clf in clfs:
            score, classification = generate_score(clf, cross_valid_data, fold=num)
            scores.append(score)
            classification_results.append(classification)
    return scores, classification_results
def generate_res_as_in_paper(df, list_of_classifiers, preprocess_scaling=True, preprocess_correlation=False):
    res = []
    for fold in set(df.fold):
        cross_valid_data = compute_cross_validation_fold(df, fold)
        cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=preprocess_correlation)


        for clf in list_of_classifiers:
            res.append(generate_score(clf, cross_valid_data)[0])
    return res
def generate_res_as_in_paper(df,
                             list_of_classifiers,
                             preprocess_scaling=True,
                             preprocess_correlation=False):
    res = []
    for fold in set(df.fold):
        cross_valid_data = compute_cross_validation_fold(df, fold)
        cross_valid_data = preprocess(
            cross_valid_data,
            preprocess_scaling=preprocess_scaling,
            preprocess_correlation=preprocess_correlation)

        for clf in list_of_classifiers:
            res.append(generate_score(clf, cross_valid_data)[0])
    return res
def compute_cross_correlation_score(df,
                                    clfs,
                                    preprocess_scaling=True,
                                    nFold=10):
    """
    return an iterator with cross validation data
    :param df:
    :param clfs:
    :param preprocess_scaling:
    :param nFold:
    :return:
    """

    to_sklearn_features = DataFrameMapper([
        ('features', sklearn.feature_extraction.DictVectorizer())
    ])

    data_X = to_sklearn_features.fit_transform(df)
    data_Y = df.expected_class

    skf = cross_validation.StratifiedKFold(data_Y, n_folds=nFold)
    classification_results = []
    scores = []
    for num, (train_index, test_index) in enumerate(skf):
        X_train, X_test = data_X[train_index], data_X[test_index]
        Y_train, Y_test = data_Y[train_index], data_Y[test_index]
        print("Len train{}, Len test{}".format(Y_train.size, Y_test.size))
        cross_valid_data = Cross_validation_split(X_train, X_test, Y_train,
                                                  Y_test)
        cross_valid_data = preprocess(cross_valid_data,
                                      preprocess_scaling=preprocess_scaling,
                                      preprocess_correlation=False)

        for clf in clfs:
            score, classification = generate_score(clf,
                                                   cross_valid_data,
                                                   fold=num)
            scores.append(score)
            classification_results.append(classification)
    return scores, classification_results