def train_AdaBoost_classifier(training_data, n_est): adaboost = AdaBoostClassifier(n_estimators=n_est) adaboost.classes_ = [0, 1] scores = cross_val_score(adaboost, training_data[:, selected_features].astype('float'), training_data[:, -1].astype('float'), cv=10, scoring='roc_auc') adaboost = adaboost.fit( training_data[:, selected_features].astype('float'), training_data[:, -1].astype('float')) print("Scores gotten using AdaBoost classifier (# of estimators=" + str(n_est) + ")") print(scores) print(np.mean(scores)) return adaboost, np.mean(scores)
def run_on_feature_union(): load_word_embeddings() (training_data, _) = read_lines_from_file('data/filtered_features.csv') training_data = np.array(training_data) #training_data = generate_normalized_data(training_data) clf = RandomForestClassifier(n_estimators=100, class_weight="balanced") clf.classes_ = [0, 1] adaboost = AdaBoostClassifier(n_estimators=100) adaboost.classes_ = [0, 1] svm_clf = svm.SVC(probability=True) svm_clf.classes_ = [0, 1] randomized = ExtraTreesClassifier(n_estimators=45, max_depth=None, min_samples_split=2, class_weight="balanced") randomized.classes_ = [0, 1] pipeline = Pipeline([ ('features', FeatureUnion( [('numeric_features', NumericFeaturesExtractor()), ('bag_of_words_features', BagOfWordsExtractor()), ('w2v_features', Word2VecExtractor())], transformer_weights={ 'numeric_features': 0.5, 'bag_of_words_features': 0.9, 'w2v_features': 1.0, })), ('clf', clf) ]) pipeline.fit(training_data, training_data[:, -1].astype('float')) scores = cross_val_score(pipeline, training_data, training_data[:, -1].astype('float'), cv=10, scoring='roc_auc') print(scores) print(np.mean(scores))