def combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind): misclassified_ids = [] predictions = [] accuracies = [] y_train, y_test = known_targets[train_index], known_targets[test_index] for i in range(0, NR_THEMES): X_train, X_test = known_dataset[i][train_index], known_dataset[i][test_index] if ind: if i == 0: model = svm_selected_net(X_train, y_train) elif i == 1: model = svm_selected_ill(X_train, y_train) elif i == 2: model = svm_selected_ideo(X_train, y_train) else: model = algorithm(X_train, y_train) accuracy = model.score(X_test, y_test) # print 'Model score for %s is %f' % (themes[i], accuracy) y_pred = model.predict(X_test) predictions.append(y_pred) accuracies.append(accuracy) misclassified_ids += add_misclassified_ids(model, test_index, known_dataset[i], known_targets, ids) predictions = np.array((predictions[0], predictions[1], predictions[2]), dtype=float) return predictions, y_test, accuracies, misclassified_ids
def svm_fusion(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind): misclassified_ids = [] training_predictions = [] predictions = [] fusion_Y_train = [] y_train, final_y_test = known_targets[train_index], known_targets[test_index] kf = StratifiedKFold(y_train, n_folds=3) curr = 0 for inner_train_index, inner_test_index in kf: for i in range(0, NR_THEMES): X_train, final_X_test = known_dataset[i][train_index], known_dataset[i][test_index] svm_X_train, svm_Y_train = X_train[inner_train_index], y_train[inner_train_index] fusion_X_train, fusion_Y_train = X_train[inner_test_index], y_train[inner_test_index] if ind: if i == 0: model = svm_selected_net(svm_X_train, svm_Y_train) elif i == 1: model = svm_selected_ill(svm_X_train, svm_Y_train) elif i == 2: model = svm_selected_ideo(svm_X_train, svm_Y_train) else: model = algorithm(svm_X_train, svm_Y_train) training_predictions.append(model.predict(fusion_X_train)) predictions.append(model.predict(final_X_test)) misclassified_ids += add_misclassified_ids(model, test_index, known_dataset[i], known_targets, ids) curr+=1 if curr == 1: break training_pred_input = np.vstack(training_predictions).T fusion_model = inner_svm(training_pred_input, fusion_Y_train) pred_input = np.vstack(predictions).T combined_predictions = fusion_model.predict(pred_input) return final_y_test, predictions, combined_predictions.tolist(), misclassified_ids
def fusion(theme, algorithm, training_data, training_targets, testing_data, testing_targets, fusion_algorithm, ind=False): models = [] for i in range(NR_THEMES): if ind: if i == 0: model = svm_selected_net(training_data[i], training_targets) elif i == 1: model = svm_selected_ill(training_data[i], training_targets) elif i == 2: model = svm_selected_ideo(training_data[i], training_targets) else: model = algorithm(training_data[i], training_targets) models.append(model) predictions = [] for i in range(NR_THEMES): y_pred = models[i].predict(testing_data[i]) predictions.append(y_pred) predictions = np.array((predictions[0], predictions[1], predictions[2]), dtype=float) if fusion_algorithm == "maj": combined_predictions = majority_vote(predictions, testing_targets, []) elif fusion_algorithm == "wmaj": combined_predictions = weighted_majority_theme(theme, predictions) elif fusion_algorithm == "svm": combined_predictions = svm_vote(predictions, testing_targets) else: print 'ERROR' print 'PRED ' + str(combined_predictions) print 'TEST ' + str(testing_targets) (hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions) error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets)) return error_rate, (hp, hr, hf), (cp, cr, cf)
def fusion(training_data, training_data_scaled, training_targets, testing_data, testing_data_scaled, testing_targets, fusion_algorithm): models_dt = [] models_dt.append(dt(training_data[0], training_targets)) models_dt.append(dt(training_data[1], training_targets)) models_dt.append(dt(training_data[2], training_targets)) models_knn = [] models_knn.append(knn(training_data_scaled[0], training_targets)) models_knn.append(knn(training_data_scaled[1], training_targets)) models_knn.append(knn(training_data_scaled[2], training_targets)) models_svm = [] models_svm.append(svm_selected_net(training_data_scaled[0], training_targets)) models_svm.append(svm_selected_ill(training_data_scaled[1], training_targets)) models_svm.append(svm_selected_ideo(training_data_scaled[2], training_targets)) predictions_dt = [] predictions_knn = [] predictions_svm = [] for i in range(NR_THEMES): y_pred_dt = models_dt[i].predict(testing_data[i]) predictions_dt.append(y_pred_dt) y_pred_knn = models_knn[i].predict(testing_data_scaled[i]) predictions_knn.append(y_pred_knn) y_pred_svm = models_svm[i].predict(testing_data_scaled[i]) predictions_svm.append(y_pred_svm) predictions_dt = np.array((predictions_dt[0], predictions_dt[1], predictions_dt[2]), dtype=float) predictions_knn = np.array((predictions_knn[0], predictions_knn[1], predictions_knn[2]), dtype=float) predictions_svm = np.array((predictions_svm[0], predictions_svm[1], predictions_svm[2]), dtype=float) combined_predictions = [] if fusion_algorithm == "maj": combined_predictions_dt = majority_vote(predictions_dt, testing_targets, []) combined_predictions_knn = majority_vote(predictions_knn, testing_targets, []) combined_predictions_svm = majority_vote(predictions_svm, testing_targets, []) elif fusion_algorithm == "wmaj": combined_predictions_dt = weighted_majority_theme('dt', predictions_dt) combined_predictions_knn = weighted_majority_theme('knn', predictions_knn) combined_predictions_svm = weighted_majority_theme('svm', predictions_svm) elif fusion_algorithm == "svm": combined_predictions_dt = svm_vote(predictions_dt) combined_predictions_knn = svm_vote(predictions_knn) combined_predictions_svm = svm_vote(predictions_svm) else: print 'ERROR' combined_predictions = [] for i in range(len(combined_predictions_dt)): data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]]) combined_predictions.append(data.most_common(1)[0][0]) print 'predictions DT ' + str(predictions_dt) print 'combined predictions DT ' + str(combined_predictions_dt) print 'predictions KNN ' + str(predictions_knn) print 'combined predictions KNN ' + str(combined_predictions_knn) print 'predictions SVM ' + str(predictions_svm) print 'combined predictions SVM ' + str(combined_predictions_svm) print 'PRED ' + str(combined_predictions) print 'TEST ' + str(testing_targets) (hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions) error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets)) return error_rate, (hp, hr, hf), (cp, cr, cf)