Esempio n. 1
0
"""
Logistic Regression Classification
Combine LR for themes
Feature selection is applied before
"""

print(__doc__)

import sys
sys.path.insert(0, 'utils/')
sys.path.insert(0, 'feature context/')
from load_data import *
from project_data import *
from fusion import cv10
from fusion import lr_feature_selection
from thematic_data_combined import combine_data_from_feature_selection
from parameters import CV_PERCENTAGE_OCCURENCE_THRESHOLD

if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	combined_dataset, targets = combine_data_from_feature_selection(targets, CV_PERCENTAGE_OCCURENCE_THRESHOLD)

	fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn")
	cv10(combined_dataset, targets, fusion_algorithm, ids, lr_feature_selection)

	
Esempio n. 2
0
"""
Logistic Regression Classification
Combine LR for themes
"""

print(__doc__)

import sys

sys.path.insert(0, "utils/")
from load_data import *
from project_data import *
from fusion import cv10
from fusion import lr
from thematic_data_combined import *

if __name__ == "__main__":
    spreadsheet = Spreadsheet(project_data_file)
    data = Data(spreadsheet)
    targets = data.targets
    ids = data.ids

    tdc = ThematicDataCombined(targets)
    dataset, targets = tdc.thematic_split()

    fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn")
    cv10(dataset, targets, fusion_algorithm, ids, lr)
Esempio n. 3
0
Feature selection is applied before
"""

print(__doc__)

import sys
sys.path.insert(0, 'utils/')
sys.path.insert(0, 'feature context/')
from load_data import *
from project_data import *
from fusion import cv10
from svms import svm_selected_for_features_fusion
from standardized_data import *
from thematic_data_combined import combine_data_from_feature_selection
from parameters import CV_PERCENTAGE_OCCURENCE_THRESHOLD	

if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	combined_dataset, targets = combine_data_from_feature_selection(targets, CV_PERCENTAGE_OCCURENCE_THRESHOLD)

	std = StandardizedData(targets)
	dataset = std.standardize_dataset(combined_dataset)  

	fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn")
	cv10(dataset, targets, fusion_algorithm, ids, svm_selected_for_features_fusion, ind=True)

	
Esempio n. 4
0
if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	percentage = float(raw_input("Enter percentage."))
	combined_dataset, targets = combine_data_from_feature_selection(targets, percentage)

	alg = raw_input("Enter algorithm. Choose lr, dt, knn, svm")
	fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn")

	for i in range(100):
		if alg == "lr":
			cv10(combined_dataset, targets, fusion_algorithm, ids, lr_feature_selection, prt=True, file_name="best_lr_"+str(percentage)+alg+"_"+fusion_algorithm+".txt")
		elif alg == "dt":
			cv10(combined_dataset, targets, fusion_algorithm, ids, dt, prt=True, file_name="best_dt_"+str(percentage)+alg+"_"+fusion_algorithm+".txt")
		elif alg == "knn":
			cv10(combined_dataset, targets, fusion_algorithm, ids, knn, prt=True, file_name="best_knn_"+str(percentage)+alg+"_"+fusion_algorithm+".txt")
		elif alg == "svm":

			std = StandardizedData(targets)
			dataset = std.standardize_dataset(combined_dataset)  
			cv10(dataset, targets, fusion_algorithm, ids, svm_selected_for_features_fusion, ind=True, prt=True, file_name="best_svm_"+str(percentage)+alg+"_"+fusion_algorithm+".txt")
		else:
			print 'ERROR'	

	
		
"""
Decision Tree Classification
Combine DT for themes
Feature selection is applied before
"""

print(__doc__)

import sys
sys.path.insert(0, 'utils/')
sys.path.insert(0, 'feature context/')
from load_data import *
from project_data import *
from fusion import cv10
from fusion import dt
from thematic_data_combined import combine_data_from_feature_selection
from parameters import CV_PERCENTAGE_OCCURENCE_THRESHOLD

if __name__ == "__main__":
	spreadsheet = Spreadsheet(project_data_file)
	data = Data(spreadsheet)
	targets = data.targets
	ids = data.ids

	combined_dataset, targets = combine_data_from_feature_selection(targets, CV_PERCENTAGE_OCCURENCE_THRESHOLD)

	fusion_algorithm = raw_input("Enter algorithm. Choose between maj, wmaj, svm, nn")
	cv10(combined_dataset, targets, fusion_algorithm, ids, dt)