def feature_selection_before(features, targets, dataset, percentage, ids, one_fold_measures, standardize=False, prt=False, file_name=None):
	[known_dataset, known_targets, unk] = split_dataset(dataset, targets)
		
	known_targets = np.asarray(known_targets)

	# these come from feature_selection_cv
	# commented out because they were saved to decrease computation time
	# cv_features = features_cross_validation(known_dataset, known_targets, features)
	# selected_features = select_final_features_from_cv(cv_features, percentage)
	selected_features = select_features(percentage)

	sf = SelectedFeatures(known_dataset, known_targets, selected_features, features)
	known_dataset = sf.extract_data_from_selected_features()

	if standardize:
		std = StandardizedData(known_targets, known_dataset)
		known_dataset, known_targets = std.split_and_standardize_dataset()  

	cv10(np.array(known_dataset), known_targets, ids, one_fold_measures, prt, file_name)

	print '####### FEATURES ####### %d \n %s' % (len(selected_features), str(selected_features))
def feature_selection(features, targets, dataset, ids, target, one_fold_measures, standardize=False):
	[known_dataset, known_targets, unk] = split_dataset(dataset, targets)
		
	known_targets = np.asarray(known_targets)

	nr_times = int(math.floor(TOP_FEATURES_PERCENTAGE_THRESHOLD * len(features)))

	if target == 'civil':
		ssa_features = get_best(civil_all, civil_all_x, civil_all_y, nr_times)
	else:
		ssa_features = get_best(highval_all, highval_all_x, highval_all_y, nr_times)

	sf = SelectedFeatures(known_dataset, known_targets, ssa_features, features)
	ssa_dataset = sf.extract_data_from_selected_features()

	if standardize:
		std = StandardizedData(known_targets, ssa_dataset)
		ssa_dataset, known_targets = std.split_and_standardize_dataset()  

	cv10(ssa_dataset, known_targets, ids, one_fold_measures)

	print '####### FEATURES ####### %d \n %s' % (len(ssa_features), str(ssa_features))
Example #3
0
Single KNN
"""

print(__doc__)

import sys
sys.path.insert(0, 'utils/')
from load_data import *
from project_data import *
from parse_theme import *
from standardized_data import *
from cv import cv10
from cv import knn_one_fold_measures

import numpy as np

if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	try:
		[dataset, features] = parse_theme(sys.argv[1])
		std = StandardizedData(targets, dataset)
		known_dataset_scaled, known_targets = std.split_and_standardize_dataset()

		cv10(known_dataset_scaled, known_targets, ids, knn_one_fold_measures)
		
	except IndexError:
		print "Error!! Pass 'all' as argument"
Example #4
0
Logistic Regression Classification
Single LR
"""

print(__doc__)

import sys
sys.path.insert(0, 'utils/')
from load_data import *
from project_data import *
from parse_theme import *
from split_dataset import *
from cv import cv10
from cv import lr_one_fold_measures

import numpy as np

if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	try:
		[dataset, features] = parse_theme(sys.argv[1])
		[known_dataset, known_targets, unk] = split_dataset(dataset, targets)

		cv10(np.array(known_dataset), np.array(known_targets), ids, lr_one_fold_measures)
		
	except IndexError:
		print "Error!! Pass 'all' as argument"
Example #5
0
print(__doc__)

import sys
sys.path.insert(0, 'utils/')
from load_data import *
from project_data import *
from parse_theme import *
from standardized_data import *
from cv import cv10
from cv import single_svm_one_fold_measures

if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	try:
		[dataset, features] = parse_theme(sys.argv[1])
		std = StandardizedData(targets, dataset)
		known_dataset_scaled, known_targets = std.split_and_standardize_dataset()

		cv10(known_dataset_scaled, known_targets, ids, single_svm_one_fold_measures)
		
	except IndexError:
		print "Error!! Pass 'all' as argument"