def runRFTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() theModel_1 = RandomForestClassifier() mae_for_param_combo_1 = perform_cross_validation( theModel_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 #n_estimators=10, criterion=entropy, max_features=sqrt, max_dept=15, max_leaf_nodes=75 #bootstrap=False, min-sample-split=1, oob_score=False, min-wt-frac=0.3, warm-start=False t1 = time.time() theModel_2 = RandomForestClassifier(n_estimators=10, criterion='entropy', max_features='sqrt', max_depth=15, max_leaf_nodes=75, bootstrap=False, min_samples_split=1, oob_score=False, min_weight_fraction_leaf=0.3, n_jobs=-1, warm_start=False) mae_for_param_combo_2 = perform_cross_validation( theModel_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runCARTTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() the_Model_1 = DecisionTreeClassifier() mae_for_param_combo_1 = perform_cross_validation( the_Model_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 #criterion=entropy splitter=random max_features=10 max_depth=25 min_samples_split=4 min_samples_leaf=2 #max_leaf_nodes=10000 t1 = time.time() the_Model_2 = DecisionTreeClassifier(criterion='entropy', splitter='random', max_features=10, max_depth=25, min_samples_split=4, min_samples_leaf=2, max_leaf_nodes=10000) mae_for_param_combo_2 = perform_cross_validation( the_Model_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def stat_a12_test_(valueListParam): for value_for_one_classifier in valueListParam: comparer = value_for_one_classifier comparees = [x for x in valueListParam if x!=value_for_one_classifier] print "---" for comparee_item in comparees: #print "comparer: {}, comapree: {}".format(comparer, comparee_item) a12_results = a12_utility.doSlowA12(comparer, comparee_item) print "----->", a12_results
def runRFTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() theModel_1 = RandomForestClassifier() mae_for_param_combo_1 = perform_cross_validation(theModel_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 #n_estimators=10, criterion=entropy, max_features=sqrt, max_dept=15, max_leaf_nodes=75 #bootstrap=False, min-sample-split=1, oob_score=False, min-wt-frac=0.3, warm-start=False t1 = time.time() theModel_2 = RandomForestClassifier(n_estimators=10, criterion='entropy', max_features='sqrt', max_depth=15, max_leaf_nodes=75, bootstrap=False, min_samples_split=1, oob_score=False, min_weight_fraction_leaf=0.3, n_jobs=-1 , warm_start=False) mae_for_param_combo_2 = perform_cross_validation(theModel_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runsvmTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() the_Model_1 = svm.SVC(kernel='rbf') mae_for_param_combo_1 = perform_cross_validation( the_Model_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 # C=10 shrinking=False tol=1e-05 decision_function_shape=ovr t1 = time.time() the_Model_2 = svm.SVC(kernel='rbf', C=10, shrinking=False, tol=1e-05, decision_function_shape='ovr') mae_for_param_combo_2 = perform_cross_validation( the_Model_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runknnTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() the_Model_1 = KNeighborsClassifier() mae_for_param_combo_1 = perform_cross_validation( the_Model_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 t1 = time.time() # n_neighbors=10 weights=distance metric=minkowski p=3 algorithm=brute the_Model_2 = KNeighborsClassifier(n_neighbors=10, weights='distance', metric='minkowski', p=3, algorithm='brute') mae_for_param_combo_2 = perform_cross_validation( the_Model_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runRFTest(orig_fileParam, synth_fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### orig_testAndTrainData = getData(orig_fileParam) orig_trainingData = orig_testAndTrainData[0] orig_testData = orig_testAndTrainData[1] ### synth_testAndTrainData = getData(synth_fileParam) synth_trainingData = synth_testAndTrainData[0] synth_testData = synth_testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() theModel_orig = RandomForestClassifier(n_estimators=50) mae_for_orig = perform_cross_validation(theModel_orig, orig_trainingData, orig_testData, cv_param)[1] t2 = time.time() time_for_orig = t2 - t1 mae_list_1.append(mae_for_orig) time_list_1.append(time_for_orig) t2, t1 = 0, 0 t1 = time.time() theModel_synth = RandomForestClassifier(n_estimators=50) mae_for_synth = perform_cross_validation(theModel_synth, synth_trainingData, synth_testData, cv_param)[1] t2 = time.time() time_for_synth = t2 - t1 mae_list_2.append(mae_for_synth) time_list_2.append(time_for_synth) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comparison: is original worse than synthetic ?", mae_a12_ print "time comparison: is synthetic slower than original ?", time_a12_
def stat_a12_test_(valueListParam): for value_for_one_classifier in valueListParam: comparer = value_for_one_classifier comparees = [ x for x in valueListParam if x != value_for_one_classifier ] print "---" for comparee_item in comparees: #print "comparer: {}, comapree: {}".format(comparer, comparee_item) a12_results = a12_utility.doSlowA12(comparer, comparee_item) print "----->", a12_results
def runCARTTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() the_Model_1 = DecisionTreeClassifier() mae_for_param_combo_1 = perform_cross_validation(the_Model_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 #criterion=entropy splitter=random max_features=10 max_depth=25 min_samples_split=4 min_samples_leaf=2 #max_leaf_nodes=10000 t1 = time.time() the_Model_2=DecisionTreeClassifier(criterion='entropy',splitter='random',max_features=10,max_depth=25,min_samples_split=4,min_samples_leaf=2,max_leaf_nodes=10000) mae_for_param_combo_2 = perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runsvmTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() the_Model_1 = svm.SVC(kernel='rbf') mae_for_param_combo_1 = perform_cross_validation(the_Model_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 # C=10 shrinking=False tol=1e-05 decision_function_shape=ovr t1 = time.time() the_Model_2 = svm.SVC(kernel='rbf', C=10, shrinking = False, tol =1e-05, decision_function_shape = 'ovr') mae_for_param_combo_2 = perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_
def runknnTest(fileParam, itnParam, cv_param): ### init mae_list_1 = [] mae_list_2 = [] time_list_1 = [] time_list_2 = [] ### testAndTrainData = getData(fileParam) trainingData = testAndTrainData[0] testData = testAndTrainData[1] for cnt in xrange(itnParam): t1 = time.time() the_Model_1 = KNeighborsClassifier() mae_for_param_combo_1 = perform_cross_validation(the_Model_1, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_1 = t2 - t1 mae_list_1.append(mae_for_param_combo_1) time_list_1.append(time_for_param_comb_1) t2, t1 = 0, 0 t1 = time.time() # n_neighbors=10 weights=distance metric=minkowski p=3 algorithm=brute the_Model_2 = KNeighborsClassifier(n_neighbors=10, weights='distance', metric='minkowski', p=3, algorithm='brute') mae_for_param_combo_2 = perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1] t2 = time.time() time_for_param_comb_2 = t2 - t1 mae_list_2.append(mae_for_param_combo_2) time_list_2.append(time_for_param_comb_2) mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2) time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1) print "MAE comaprison: is default worse than 'best combo' ?", mae_a12_ print "time comaprison: is 'best' combo slower than default ?", time_a12_