Ejemplo n.º 1
0
def combine_predictions_one_fold_using_majority(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind):
	misclassified_ids = []

	predictions = []
	accuracies = []
	y_train, y_test = known_targets[train_index], known_targets[test_index]
	for i in range(0, NR_THEMES):
		X_train, X_test = known_dataset[i][train_index], known_dataset[i][test_index]

		if ind:
			if i == 0:
				model = svm_selected_net(X_train, y_train)
			elif i == 1:
				model = svm_selected_ill(X_train, y_train)
			elif i == 2:
				model = svm_selected_ideo(X_train, y_train)
		else:
			model = algorithm(X_train, y_train)

		accuracy = model.score(X_test, y_test)
		# print 'Model score for %s is %f' % (themes[i], accuracy)
		y_pred = model.predict(X_test)
		predictions.append(y_pred)
		accuracies.append(accuracy)
		misclassified_ids += add_misclassified_ids(model, test_index, known_dataset[i], known_targets, ids)
	
	predictions = np.array((predictions[0], predictions[1], predictions[2]), dtype=float)
	return predictions, y_test, accuracies, misclassified_ids
Ejemplo n.º 2
0
def svm_fusion(known_dataset, known_targets, train_index, test_index, ids, algorithm, ind):
	misclassified_ids = []

	training_predictions = []
	predictions = []
	fusion_Y_train = []
	y_train, final_y_test = known_targets[train_index], known_targets[test_index]

	kf = StratifiedKFold(y_train, n_folds=3)
	curr = 0
	for inner_train_index, inner_test_index in kf:

		for i in range(0, NR_THEMES):
			X_train, final_X_test = known_dataset[i][train_index], known_dataset[i][test_index]
			svm_X_train, svm_Y_train = X_train[inner_train_index], y_train[inner_train_index]
			fusion_X_train, fusion_Y_train = X_train[inner_test_index], y_train[inner_test_index]


			if ind:
				if i == 0:
					model = svm_selected_net(svm_X_train, svm_Y_train)
				elif i == 1:
					model = svm_selected_ill(svm_X_train, svm_Y_train)
				elif i == 2:
					model = svm_selected_ideo(svm_X_train, svm_Y_train)
			else:
				model = algorithm(svm_X_train, svm_Y_train)

			training_predictions.append(model.predict(fusion_X_train))
			predictions.append(model.predict(final_X_test))
			misclassified_ids += add_misclassified_ids(model, test_index, known_dataset[i], known_targets, ids)

		curr+=1
		if curr == 1:
			break

	training_pred_input = np.vstack(training_predictions).T
	fusion_model = inner_svm(training_pred_input, fusion_Y_train)

	pred_input = np.vstack(predictions).T
	combined_predictions = fusion_model.predict(pred_input)

	return final_y_test, predictions, combined_predictions.tolist(), misclassified_ids		
def fusion(theme, algorithm, training_data, training_targets, testing_data, testing_targets, fusion_algorithm, ind=False):
	models = []
	for i in range(NR_THEMES):

		if ind:
			if i == 0:
				model = svm_selected_net(training_data[i], training_targets)
			elif i == 1:
				model = svm_selected_ill(training_data[i], training_targets)
			elif i == 2:
				model = svm_selected_ideo(training_data[i], training_targets)
		else:
			model = algorithm(training_data[i], training_targets)
		models.append(model)

	predictions = []
	for i in range(NR_THEMES):
		y_pred = models[i].predict(testing_data[i])
		predictions.append(y_pred)
	predictions = np.array((predictions[0], predictions[1], predictions[2]), dtype=float)

	if fusion_algorithm == "maj":
		combined_predictions = majority_vote(predictions, testing_targets, [])
	elif fusion_algorithm == "wmaj":
		combined_predictions = weighted_majority_theme(theme, predictions)
	elif fusion_algorithm == "svm":	
		combined_predictions = svm_vote(predictions, testing_targets)
	else:
		print 'ERROR'	
	
	print 'PRED ' + str(combined_predictions)
	print 'TEST ' + str(testing_targets)	

	(hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions)
	error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets))
	return error_rate, (hp, hr, hf), (cp, cr, cf)
Ejemplo n.º 4
0
def fusion(training_data, training_data_scaled, training_targets, testing_data, testing_data_scaled, testing_targets, fusion_algorithm):
	models_dt = []
	models_dt.append(dt(training_data[0], training_targets))
	models_dt.append(dt(training_data[1], training_targets))
	models_dt.append(dt(training_data[2], training_targets))

	models_knn = []
	models_knn.append(knn(training_data_scaled[0], training_targets))
	models_knn.append(knn(training_data_scaled[1], training_targets))
	models_knn.append(knn(training_data_scaled[2], training_targets))

	models_svm = []
	models_svm.append(svm_selected_net(training_data_scaled[0], training_targets))
	models_svm.append(svm_selected_ill(training_data_scaled[1], training_targets))
	models_svm.append(svm_selected_ideo(training_data_scaled[2], training_targets))

	predictions_dt = []
	predictions_knn = []
	predictions_svm = []
	for i in range(NR_THEMES):
		y_pred_dt = models_dt[i].predict(testing_data[i])
		predictions_dt.append(y_pred_dt)

		y_pred_knn = models_knn[i].predict(testing_data_scaled[i])
		predictions_knn.append(y_pred_knn)

		y_pred_svm = models_svm[i].predict(testing_data_scaled[i])
		predictions_svm.append(y_pred_svm)

	predictions_dt = np.array((predictions_dt[0], predictions_dt[1], predictions_dt[2]), dtype=float)
	predictions_knn = np.array((predictions_knn[0], predictions_knn[1], predictions_knn[2]), dtype=float)
	predictions_svm = np.array((predictions_svm[0], predictions_svm[1], predictions_svm[2]), dtype=float)	

	combined_predictions = []

	if fusion_algorithm == "maj":
		combined_predictions_dt = majority_vote(predictions_dt, testing_targets, [])
		combined_predictions_knn = majority_vote(predictions_knn, testing_targets, [])
		combined_predictions_svm = majority_vote(predictions_svm, testing_targets, [])
	elif fusion_algorithm == "wmaj":
		combined_predictions_dt = weighted_majority_theme('dt', predictions_dt)
		combined_predictions_knn = weighted_majority_theme('knn', predictions_knn)
		combined_predictions_svm = weighted_majority_theme('svm', predictions_svm)
	elif fusion_algorithm == "svm":	
		combined_predictions_dt = svm_vote(predictions_dt)
		combined_predictions_knn = svm_vote(predictions_knn)
		combined_predictions_svm = svm_vote(predictions_svm)
	else:
		print 'ERROR'		

	combined_predictions = []		
	for i in range(len(combined_predictions_dt)):
		data = Counter([combined_predictions_dt[i], combined_predictions_knn[i], combined_predictions_svm[i]])
		combined_predictions.append(data.most_common(1)[0][0])	
	
	print 'predictions DT ' + str(predictions_dt) 	
	print 'combined predictions DT ' + str(combined_predictions_dt) 	
	print 'predictions KNN ' + str(predictions_knn) 	
	print 'combined predictions KNN ' + str(combined_predictions_knn)
	print 'predictions SVM ' + str(predictions_svm) 	
	print 'combined predictions SVM ' + str(combined_predictions_svm)

	print 'PRED ' + str(combined_predictions)
	print 'TEST ' + str(testing_targets)	
	
	(hp, hr, hf), (cp, cr, cf) = measures(testing_targets, combined_predictions)
	error_rate = (float(sum((combined_predictions - testing_targets)**2)) / len(testing_targets))
	return error_rate, (hp, hr, hf), (cp, cr, cf)