def compute_cross_correlation_score(df, clfs, preprocess_scaling=True, nFold=10): """ return an iterator with cross validation data :param df: :param clfs: :param preprocess_scaling: :param nFold: :return: """ to_sklearn_features = DataFrameMapper([('features', sklearn.feature_extraction.DictVectorizer())]) data_X = to_sklearn_features.fit_transform(df) data_Y = df.expected_class skf = cross_validation.StratifiedKFold(data_Y, n_folds=nFold) classification_results = [] scores = [] for num, (train_index, test_index) in enumerate(skf): X_train, X_test = data_X[train_index], data_X[test_index] Y_train, Y_test = data_Y[train_index], data_Y[test_index] print("Len train{}, Len test{}".format(Y_train.size, Y_test.size)) cross_valid_data = Cross_validation_split(X_train, X_test, Y_train, Y_test) cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=False) for clf in clfs: score, classification = generate_score(clf, cross_valid_data, fold=num) scores.append(score) classification_results.append(classification) return scores, classification_results
def generate_res_as_in_paper(df, list_of_classifiers, preprocess_scaling=True, preprocess_correlation=False): res = [] for fold in set(df.fold): cross_valid_data = compute_cross_validation_fold(df, fold) cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=preprocess_correlation) for clf in list_of_classifiers: res.append(generate_score(clf, cross_valid_data)[0]) return res
def generate_res_as_in_paper(df, list_of_classifiers, preprocess_scaling=True, preprocess_correlation=False): res = [] for fold in set(df.fold): cross_valid_data = compute_cross_validation_fold(df, fold) cross_valid_data = preprocess( cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=preprocess_correlation) for clf in list_of_classifiers: res.append(generate_score(clf, cross_valid_data)[0]) return res
def compute_cross_correlation_score(df, clfs, preprocess_scaling=True, nFold=10): """ return an iterator with cross validation data :param df: :param clfs: :param preprocess_scaling: :param nFold: :return: """ to_sklearn_features = DataFrameMapper([ ('features', sklearn.feature_extraction.DictVectorizer()) ]) data_X = to_sklearn_features.fit_transform(df) data_Y = df.expected_class skf = cross_validation.StratifiedKFold(data_Y, n_folds=nFold) classification_results = [] scores = [] for num, (train_index, test_index) in enumerate(skf): X_train, X_test = data_X[train_index], data_X[test_index] Y_train, Y_test = data_Y[train_index], data_Y[test_index] print("Len train{}, Len test{}".format(Y_train.size, Y_test.size)) cross_valid_data = Cross_validation_split(X_train, X_test, Y_train, Y_test) cross_valid_data = preprocess(cross_valid_data, preprocess_scaling=preprocess_scaling, preprocess_correlation=False) for clf in clfs: score, classification = generate_score(clf, cross_valid_data, fold=num) scores.append(score) classification_results.append(classification) return scores, classification_results