def main():
    # when the attributes have different data range
    heterogeneous_data = mock_Chinese_stock_price.get_stockset_various()

    # in this dataset, I have added investment, and employee number,
    # they all have large numbers and will influence the results significantly without normalization,
    # then those more important attributes with smaller values may not influence the result and the final result cannot be accurate
    print "before re-scale/normalization"
    cv_total_error_unweighted = cross_validation.cross_validate(heterogeneous_data, algr=KNN.get_KNN, trails=100)
    cv_total_error_weighted = cross_validation.cross_validate(heterogeneous_data, algr=KNN.get_weightedKNN, trails=100)
    print "cross validation, using un-weighted KNN: ", cv_total_error_unweighted
    print "cross validation, using weighted KNN: ", cv_total_error_weighted

    print "after re-scale"
    scale = [10, 10, 10, 0.00001, 0]
    scaled_data = rescale(heterogeneous_data, scale)
    scaled_cv_total_error_unweighted = cross_validation.cross_validate(scaled_data, algr=KNN.get_KNN, trails=100)
    scaled_cv_total_error_weighted = cross_validation.cross_validate(scaled_data, algr=KNN.get_weightedKNN, trails=100)
    print "cross validation, using un-weighted KNN: ", scaled_cv_total_error_unweighted
    print "cross validation, using weighted KNN: ", scaled_cv_total_error_weighted

    print "after normalization"
    min_max = [(1, 10), (1, 20), (1, 50), (10000, 10000000)]
    normalized_data = normalization(heterogeneous_data, min_max)
    normalized_cv_total_error_unweighted = cross_validation.cross_validate(
        normalized_data, algr=KNN.get_KNN, trails=100
    )
    normalized_cv_total_error_weighted = cross_validation.cross_validate(
        normalized_data, algr=KNN.get_weightedKNN, trails=100
    )
    print "cross validation, using un-weighted KNN: ", normalized_cv_total_error_unweighted
    print "cross validation, using weighted KNN: ", normalized_cv_total_error_weighted
def main():
    # when the attributes have different data range
    heterogeneous_data = mock_Chinese_stock_price.get_stockset_various()
    
    # in this dataset, I have added investment, and employee number, 
    # they all have large numbers and will influence the results significantly without normalization, 
    # then those more important attributes with smaller values may not influence the result and the final result cannot be accurate
    print 'before re-scale/normalization'
    cv_total_error_unweighted = cross_validation.cross_validate(heterogeneous_data, algr = KNN.get_KNN, trails=100)
    cv_total_error_weighted = cross_validation.cross_validate(heterogeneous_data, algr = KNN.get_weightedKNN, trails=100)
    print 'cross validation, using un-weighted KNN: ', cv_total_error_unweighted
    print 'cross validation, using weighted KNN: ', cv_total_error_weighted
    
    print 'after re-scale'
    scale = [10, 10, 10, 0.00001, 0]
    scaled_data = rescale(heterogeneous_data, scale)
    scaled_cv_total_error_unweighted = cross_validation.cross_validate(scaled_data, algr = KNN.get_KNN, trails=100)
    scaled_cv_total_error_weighted = cross_validation.cross_validate(scaled_data, algr = KNN.get_weightedKNN, trails=100)
    print 'cross validation, using un-weighted KNN: ', scaled_cv_total_error_unweighted
    print 'cross validation, using weighted KNN: ', scaled_cv_total_error_weighted
    
    print 'after normalization'
    min_max = [(1,10), (1,20), (1,50), (10000, 10000000)]
    normalized_data = normalization(heterogeneous_data, min_max)
    normalized_cv_total_error_unweighted = cross_validation.cross_validate(normalized_data, algr = KNN.get_KNN, trails=100)
    normalized_cv_total_error_weighted = cross_validation.cross_validate(normalized_data, algr = KNN.get_weightedKNN, trails=100)
    print 'cross validation, using un-weighted KNN: ', normalized_cv_total_error_unweighted
    print 'cross validation, using weighted KNN: ', normalized_cv_total_error_weighted
def main():
    domain = [(0,10)]*5
    data= mock_Chinese_stock_price.get_stockset_various()
    costf = generatecostf(data, algr=KNN.get_KNN, trails=10)
    annealing_optimized_result = annealing_opt(domain, costf)
    print 'using annealing optimizaton: [rating, age, duration, investment, employee_number]', annealing_optimized_result
    
    genetic_optimized_resule = genetic_optimization(domain, costf)
    print 'using genetic optimizaton: [rating, age, duration, investment, employee_number]', genetic_optimized_result