def fusion_outputs(known_dataset, known_targets, train_index, test_index, fusion_algorithm, ids, algorithm, ind): misclassified_ids = [] combined_predictions = [] y_test = [] if fusion_algorithm == 'maj': predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind) combined_predictions = majority_vote(predictions, y_test, accuracies) elif fusion_algorithm == 'wmaj': predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind) combined_predictions, weights = weighted_majority(predictions, y_test) elif fusion_algorithm == 'svm': y_test, predictions, combined_predictions, misclassified_ids = svm_fusion(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind) elif fusion_algorithm == 'nn': print 'not done' else: print 'Error parsing algorithm' # print '###############' # print 'Y_PRED %s' % str(predictions) # print 'Y_TEST %s' % str(y_test) # print 'COMBINED %s' % str(combined_predictions) # print '###############' (hp, hr, hf), (cp, cr, cf) = measures(y_test, combined_predictions) error = (float(sum((combined_predictions - y_test)**2)) / len(y_test)) f1 = f1_score(combined_predictions, y_test) return error, f1, misclassified_ids, (hp, hr, hf), (cp, cr, cf)
def lr_one_fold_measures_feature_selection(X_train, X_test, y_train, y_test): model = lr_feature_selection(X_train, y_train) # print 'Model score %f' % model.score(X_test, y_test) y_pred = model.predict(X_test) error_rate = (float(sum((y_pred - y_test)**2)) / len(y_test)) f1 = f1_score(y_test, y_pred) (hp, hr, hf), (cp, cr, cf) = measures(y_test, y_pred) # print_pred_test(y_pred, y_test) return error_rate, f1, model, (hp, hr, hf), (cp, cr, cf)
def ensemble_one_fold_measures(X_train, X_test, X_train_scaled, X_test_scaled, y_train, y_test, dt, knn, svm): model_dt = dt(X_train, y_train) y_pred_dt = model_dt.predict(X_test) model_knn = knn(X_train_scaled, y_train) y_pred_knn = model_knn.predict(X_test_scaled) model_svm = svm(X_train_scaled, y_train) y_pred_svm = model_svm.predict(X_test_scaled) y_pred = [] assert len(y_pred_dt) == len(y_pred_knn) assert len(y_pred_dt) == len(y_pred_svm) for i in range(len(y_pred_dt)): data = Counter([y_pred_dt[i], y_pred_knn[i], y_pred_svm[i]]) y_pred.append(data.most_common(1)[0][0]) error_rate = (float(sum((y_pred - y_test)**2)) / len(y_test)) f1 = f1_score(y_test, y_pred) (hp, hr, hf), (cp, cr, cf) = measures(y_test, y_pred) return error_rate, f1, (hp, hr, hf), (cp, cr, cf)
def fusion(theme, algorithm, training_data, training_targets, testing_data, testing_targets, fusion_algorithm, ind=False): models = [] for i in range(NR_THEMES): if ind: if i == 0: model = svm_selected_net(training_data[i], training_targets) elif i == 1: model = svm_selected_ill(training_data[i], training_targets) elif i == 2: model = svm_selected_ideo(training_data[i], training_targets) else: model = algorithm(training_data[i], training_targets) models.append(model) predictions = [] for i in range(NR_THEMES): y_pred = models[i].predict(testing_data[i]) predictions.append(y_pred) predictions = np.array((predictions[0], predictions[1], predictions[2]), dtype=float) if fusion_algorithm == "maj": combined_predictions = majority_vote(predictions, testing_targets, []) elif fusion_algorithm == "wmaj": combined_predictions = weighted_majority_theme(theme, predictions) elif fusion_algorithm == "svm": combined_predictions = svm_vote(predictions, testing_targets) else: print 'ERROR' print 'PRED ' + str(combined_predictions) print 'TEST ' + str(testing_targets) (hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions) error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets)) return error_rate, (hp, hr, hf), (cp, cr, cf)
def fusion(training_data, training_data_scaled, training_targets, testing_data, testing_data_scaled, testing_targets, fusion_algorithm): models_dt = [] models_dt.append(dt(training_data[0], training_targets)) models_dt.append(dt(training_data[1], training_targets)) models_dt.append(dt(training_data[2], training_targets)) models_knn = [] models_knn.append(knn(training_data_scaled[0], training_targets)) models_knn.append(knn(training_data_scaled[1], training_targets)) models_knn.append(knn(training_data_scaled[2], training_targets)) models_svm = [] models_svm.append(svm_selected_net(training_data_scaled[0], training_targets)) models_svm.append(svm_selected_ill(training_data_scaled[1], training_targets)) models_svm.append(svm_selected_ideo(training_data_scaled[2], training_targets)) predictions_dt = [] predictions_knn = [] predictions_svm = [] for i in range(NR_THEMES): y_pred_dt = models_dt[i].predict(testing_data[i]) predictions_dt.append(y_pred_dt) y_pred_knn = models_knn[i].predict(testing_data_scaled[i]) predictions_knn.append(y_pred_knn) y_pred_svm = models_svm[i].predict(testing_data_scaled[i]) predictions_svm.append(y_pred_svm) predictions_dt = np.array((predictions_dt[0], predictions_dt[1], predictions_dt[2]), dtype=float) predictions_knn = np.array((predictions_knn[0], predictions_knn[1], predictions_knn[2]), dtype=float) predictions_svm = np.array((predictions_svm[0], predictions_svm[1], predictions_svm[2]), dtype=float) combined_predictions = [] if fusion_algorithm == "maj": combined_predictions_dt = majority_vote(predictions_dt, testing_targets, []) combined_predictions_knn = majority_vote(predictions_knn, testing_targets, []) combined_predictions_svm = majority_vote(predictions_svm, testing_targets, []) elif fusion_algorithm == "wmaj": combined_predictions_dt = weighted_majority_theme('dt', predictions_dt) combined_predictions_knn = weighted_majority_theme('knn', predictions_knn) combined_predictions_svm = weighted_majority_theme('svm', predictions_svm) elif fusion_algorithm == "svm": combined_predictions_dt = svm_vote(predictions_dt) combined_predictions_knn = svm_vote(predictions_knn) combined_predictions_svm = svm_vote(predictions_svm) else: print 'ERROR' combined_predictions = [] for i in range(len(combined_predictions_dt)): data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]]) combined_predictions.append(data.most_common(1)[0][0]) print 'predictions DT ' + str(predictions_dt) print 'combined predictions DT ' + str(combined_predictions_dt) print 'predictions KNN ' + str(predictions_knn) print 'combined predictions KNN ' + str(combined_predictions_knn) print 'predictions SVM ' + str(predictions_svm) print 'combined predictions SVM ' + str(combined_predictions_svm) print 'PRED ' + str(combined_predictions) print 'TEST ' + str(testing_targets) (hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions) error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets)) return error_rate, (hp, hr, hf), (cp, cr, cf)
def fusion_outputs_ensemble(known_dataset, known_targets, known_dataset_scaled, dt, knn, svm, fusion_algorithm, train_index, test_index, ids): misclassified_ids = [] combined_predictions = [] y_test = [] if fusion_algorithm == 'maj': predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, dt, ind=False) combined_predictions_dt = majority_vote(predictions, y_test, accuracies) predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, knn, ind=False) combined_predictions_knn = majority_vote(predictions, y_test, accuracies) predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, svm, ind=True) combined_predictions_svm = majority_vote(predictions, y_test, accuracies) combined_predictions = [] assert len(combined_predictions_dt) == len(combined_predictions_knn) assert len(combined_predictions_dt) == len(combined_predictions_svm) for i in range(len(combined_predictions_dt)): data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]]) combined_predictions.append(data.most_common(1)[0][0]) elif fusion_algorithm == 'wmaj': predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, dt, ind=False) combined_predictions_dt, weights = weighted_majority(predictions, y_test) predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, knn, ind=False) combined_predictions_knn, weights = weighted_majority(predictions, y_test) predictions, y_test, accuracies, misclassified_ids = combine_predictions_one_fold_using_majority(known_dataset_scaled, known_targets, train_index, test_index, ids, svm, ind=True) combined_predictions_svm, weights = weighted_majority(predictions, y_test) combined_predictions = [] assert len(combined_predictions_dt) == len(combined_predictions_knn) assert len(combined_predictions_dt) == len(combined_predictions_svm) for i in range(len(combined_predictions_dt)): data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]]) combined_predictions.append(data.most_common(1)[0][0]) elif fusion_algorithm == 'svm': y_test, predictions, combined_predictions_dt, misclassified_ids = svm_fusion(known_dataset, known_targets, train_index, test_index, ids, dt, ind=False) y_test, predictions, combined_predictions_knn, misclassified_ids = svm_fusion(known_dataset_scaled, known_targets, train_index, test_index, ids, knn, ind=False) y_test, predictions, combined_predictions_svm, misclassified_ids = svm_fusion(known_dataset_scaled, known_targets, train_index, test_index, ids, svm, ind=True) combined_predictions = [] assert len(combined_predictions_dt) == len(combined_predictions_knn) assert len(combined_predictions_dt) == len(combined_predictions_svm) for i in range(len(combined_predictions_dt)): data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]]) combined_predictions.append(data.most_common(1)[0][0]) elif fusion_algorithm == 'nn': print 'not done' else: print 'Error parsing algorithm' # print '###############' # print 'Y_PRED %s' % str(predictions) # print 'Y_TEST %s' % str(y_test) # print 'COMBINED %s' % str(combined_predictions) # print '###############' (hp, hr, hf), (cp, cr, cf) = measures(y_test, combined_predictions) error = (float(sum((combined_predictions - y_test)**2)) / len(y_test)) f1 = f1_score(combined_predictions, y_test) return error, f1, misclassified_ids, (hp, hr, hf), (cp, cr, cf)