def main(): # when the attributes have different data range heterogeneous_data = mock_Chinese_stock_price.get_stockset_various() # in this dataset, I have added investment, and employee number, # they all have large numbers and will influence the results significantly without normalization, # then those more important attributes with smaller values may not influence the result and the final result cannot be accurate print "before re-scale/normalization" cv_total_error_unweighted = cross_validation.cross_validate(heterogeneous_data, algr=KNN.get_KNN, trails=100) cv_total_error_weighted = cross_validation.cross_validate(heterogeneous_data, algr=KNN.get_weightedKNN, trails=100) print "cross validation, using un-weighted KNN: ", cv_total_error_unweighted print "cross validation, using weighted KNN: ", cv_total_error_weighted print "after re-scale" scale = [10, 10, 10, 0.00001, 0] scaled_data = rescale(heterogeneous_data, scale) scaled_cv_total_error_unweighted = cross_validation.cross_validate(scaled_data, algr=KNN.get_KNN, trails=100) scaled_cv_total_error_weighted = cross_validation.cross_validate(scaled_data, algr=KNN.get_weightedKNN, trails=100) print "cross validation, using un-weighted KNN: ", scaled_cv_total_error_unweighted print "cross validation, using weighted KNN: ", scaled_cv_total_error_weighted print "after normalization" min_max = [(1, 10), (1, 20), (1, 50), (10000, 10000000)] normalized_data = normalization(heterogeneous_data, min_max) normalized_cv_total_error_unweighted = cross_validation.cross_validate( normalized_data, algr=KNN.get_KNN, trails=100 ) normalized_cv_total_error_weighted = cross_validation.cross_validate( normalized_data, algr=KNN.get_weightedKNN, trails=100 ) print "cross validation, using un-weighted KNN: ", normalized_cv_total_error_unweighted print "cross validation, using weighted KNN: ", normalized_cv_total_error_weighted
def main(): # when the attributes have different data range heterogeneous_data = mock_Chinese_stock_price.get_stockset_various() # in this dataset, I have added investment, and employee number, # they all have large numbers and will influence the results significantly without normalization, # then those more important attributes with smaller values may not influence the result and the final result cannot be accurate print 'before re-scale/normalization' cv_total_error_unweighted = cross_validation.cross_validate(heterogeneous_data, algr = KNN.get_KNN, trails=100) cv_total_error_weighted = cross_validation.cross_validate(heterogeneous_data, algr = KNN.get_weightedKNN, trails=100) print 'cross validation, using un-weighted KNN: ', cv_total_error_unweighted print 'cross validation, using weighted KNN: ', cv_total_error_weighted print 'after re-scale' scale = [10, 10, 10, 0.00001, 0] scaled_data = rescale(heterogeneous_data, scale) scaled_cv_total_error_unweighted = cross_validation.cross_validate(scaled_data, algr = KNN.get_KNN, trails=100) scaled_cv_total_error_weighted = cross_validation.cross_validate(scaled_data, algr = KNN.get_weightedKNN, trails=100) print 'cross validation, using un-weighted KNN: ', scaled_cv_total_error_unweighted print 'cross validation, using weighted KNN: ', scaled_cv_total_error_weighted print 'after normalization' min_max = [(1,10), (1,20), (1,50), (10000, 10000000)] normalized_data = normalization(heterogeneous_data, min_max) normalized_cv_total_error_unweighted = cross_validation.cross_validate(normalized_data, algr = KNN.get_KNN, trails=100) normalized_cv_total_error_weighted = cross_validation.cross_validate(normalized_data, algr = KNN.get_weightedKNN, trails=100) print 'cross validation, using un-weighted KNN: ', normalized_cv_total_error_unweighted print 'cross validation, using weighted KNN: ', normalized_cv_total_error_weighted
def main(): domain = [(0,10)]*5 data= mock_Chinese_stock_price.get_stockset_various() costf = generatecostf(data, algr=KNN.get_KNN, trails=10) annealing_optimized_result = annealing_opt(domain, costf) print 'using annealing optimizaton: [rating, age, duration, investment, employee_number]', annealing_optimized_result genetic_optimized_resule = genetic_optimization(domain, costf) print 'using genetic optimizaton: [rating, age, duration, investment, employee_number]', genetic_optimized_result