def runRandomForest(trainDataParam, testDataParam):
  res_combo_dict ={}
  #n_estimators_list=[500]
  n_estimators_list             = [75, 80, 85]
  criterion_list                = ['gini', 'entropy']
  #max_features_list             = ['auto', 'sqrt', 'log2', None]
  max_depth_list                = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, None]
  max_leaf_nodes_list           = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, None]
  bootstrap_list                = [True, False]
  #min_samples_split_list        = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100]
  #oob_score_list                = [True, False]
  min_weight_fraction_leaf_list = [0.1, 0.2, 0.3, 0.4, 0.5] # cannot be more than 0.50


  ### setting the aprameters : test purpose
#  n_estimators_list=[50, 50000]
#  criterion_list = ['gini', 'entropy']
#  max_features_list=['auto',  None]
#  max_depth_list = [1, 1000 ]
#  max_leaf_nodes_list = [None, 5, 1000] # in our datset only 549 legit samples so should eb limited to 549
#  bootstrap_list=[True, False]
#  min_samples_split_list = [1,  1000]  # in our datset only 549 legit samples so should eb limited to 549
#  oob_score_list=[True, False]
#  min_weight_fraction_leaf_list=[0.0,  0.5] # must be between 0.0 and 0.50
#  warm_start_list=[True, False]
  ###

  for eti in n_estimators_list:
    for crit in criterion_list:
        for max_depth_ in max_depth_list:
          for max_leaf in max_leaf_nodes_list:
            for bootstrap_ in bootstrap_list:
                  for mwfratleaf in min_weight_fraction_leaf_list:
                      ## display params:
                      # n_jobs  has been set to -1 to use all the cores avialable , not part fo an experiemnt
                      print "##########"
                      print "n_estimators={}, criterion={}, max_dept={}, max_leaf_nodes={}".format(eti, crit, max_depth_, max_leaf  )
                      print "bootstrap={},  min-wt-frac={}".format(bootstrap_, mwfratleaf )
                      key_str_1 = str(eti) + "_" + crit + "_"  + str(max_depth_) + "_" + str(max_leaf) + "_"
                      key_str_2 = str(bootstrap_) + "_" + str(mwfratleaf) + "_"
                      key_for_dict = key_str_1 + key_str_2
                      ## fire up the model
                      with IO_.duration():
                        theRndForestModel = RandomForestClassifier(
                                                            n_estimators=eti, criterion=crit,
                                                            max_depth=max_depth_,
                                                            min_weight_fraction_leaf=mwfratleaf,
                                                            max_leaf_nodes=max_leaf, bootstrap=bootstrap_
                                                            )
                        res_tuple = perform_cross_validation(theRndForestModel, trainDataParam, testDataParam, 10)
                        res_combo_dict[key_for_dict] = res_tuple
                      print "##########"
  return res_combo_dict
                                             max_leaf_nodes=10000)

        mae_for_param_combo_2 = perform_cross_validation(
            the_Model_2, trainingData, testData, cv_param)[1]
        t2 = time.time()
        time_for_param_comb_2 = t2 - t1
        mae_list_2.append(mae_for_param_combo_2)
        time_list_2.append(time_for_param_comb_2)

    mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)
    time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)
    print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_
    print "time comaprison: is 'best' combo slower than default ?", time_a12_


datasetFileName = "13_NonZeroDataset_Aggolo.csv"
iterations = 10000
cv_param = 5
print "========== Random Forest =========="
with IO_.duration():
    runRFTest(datasetFileName, iterations, cv_param)
print "========== KNN =========="
with IO_.duration():
    runknnTest(datasetFileName, iterations, cv_param)
print "========== SVM =========="
with IO_.duration():
    runsvmTest(datasetFileName, iterations, cv_param)
print "========== CART =========="
with IO_.duration():
    runCARTTest(datasetFileName, iterations, cv_param)
    mae_for_param_combo_2 =  perform_cross_validation(the_Model_2, trainingData, testData, cv_param)[1]
    t2 = time.time()  
    time_for_param_comb_2 =  t2 - t1
    mae_list_2.append(mae_for_param_combo_2) 
    time_list_2.append(time_for_param_comb_2)    
    
    
    
  mae_a12_ = a12_utility.doSlowA12(mae_list_1, mae_list_2)    
  time_a12_ = a12_utility.doSlowA12(time_list_2, time_list_1)   
  print "MAE  comaprison: is default worse than 'best combo' ?", mae_a12_  
  print "time comaprison: is 'best' combo slower than default ?", time_a12_      



datasetFileName="13_NonZeroDataset_Aggolo.csv"
iterations=10000
cv_param = 5
print "========== Random Forest =========="
with IO_.duration():
  runRFTest(datasetFileName, iterations, cv_param)
print "========== KNN =========="  
with IO_.duration():
  runknnTest(datasetFileName, iterations, cv_param) 
print "========== SVM =========="    
with IO_.duration():
  runsvmTest(datasetFileName, iterations, cv_param) 
print "========== CART =========="    
with IO_.duration():
  runCARTTest(datasetFileName, iterations, cv_param)   
def runRandomForest(trainDataParam, testDataParam):
  res_combo_dict ={}  
#  ### setting the aprameters 
  n_estimators_list=[500]
  #n_estimators_list=[10, 50, 100, 500]
  criterion_list = ['gini', 'entropy']
  max_features_list=['auto', 'sqrt', 'log2', None]
  max_depth_list = [5, 15,  50, None ]
  max_leaf_nodes_list = [None,  25, 50, 75] # in our datset only 549 legit samples so should eb limited to 549 
  bootstrap_list=[True, False] 
  min_samples_split_list = [1, 25, 50,  100] # in our datset only 549 legit samples so should eb limited to 549 
  oob_score_list=[True, False]
  min_weight_fraction_leaf_list=[0.0, 0.2, 0.3, 0.4] # must be between 0.0 and 0.50 
  warm_start_list=[True, False]
#  ###   
  
  ### setting the aprameters : test purpose 
#  n_estimators_list=[50, 50000]
#  criterion_list = ['gini', 'entropy']
#  max_features_list=['auto',  None]
#  max_depth_list = [1, 1000 ]
#  max_leaf_nodes_list = [None, 5, 1000] # in our datset only 549 legit samples so should eb limited to 549 
#  bootstrap_list=[True, False] 
#  min_samples_split_list = [1,  1000]  # in our datset only 549 legit samples so should eb limited to 549 
#  oob_score_list=[True, False]
#  min_weight_fraction_leaf_list=[0.0,  0.5] # must be between 0.0 and 0.50 
#  warm_start_list=[True, False]
  ###     
  
  for eti in n_estimators_list:
    for crit in criterion_list:
      for maxfeat in max_features_list: 
        for max_depth_ in max_depth_list:
          for max_leaf in max_leaf_nodes_list:
            for bootstrap_ in bootstrap_list:
              for min_sample in min_samples_split_list: 
                if bootstrap_==False:
                  oob_score_list=[False, False]      
                for oob_ in oob_score_list:    
                  for mwfratleaf in min_weight_fraction_leaf_list: 
                    for warm_start_ in warm_start_list:  
                      ## display params: 
                      # n_jobs  has been set to -1 to use all the cores avialable , not part fo an experiemnt 
                      print "##########"
                      print "n_estimators={}, criterion={}, max_features={}, max_dept={}, max_leaf_nodes={}".format(eti, crit, maxfeat, max_depth_, max_leaf  )
                      print "bootstrap={}, min-sample-split={}, oob_score={}, min-wt-frac={}, warm-start={}".format(bootstrap_, min_sample, oob_, mwfratleaf, warm_start_ ) 
                      key_str_1 = str(eti) + "_" + crit + "_" + str(maxfeat) + "_" + str(max_depth_) + "_" + str(max_leaf) + "_" 
                      key_str_2 = str(bootstrap_) + "_" + str(min_sample) + "_" + str(oob_) + "_" + str(mwfratleaf) + "_" +str(warm_start_) 
                      key_for_dict = key_str_1 + key_str_2 
                      ## fire up the model 
                      with IO_.duration():
                        theRndForestModel = RandomForestClassifier( 
                                                            n_estimators=eti, criterion=crit, 
                                                            max_depth=max_depth_, min_samples_split=min_sample, 
                                                            max_features=maxfeat, min_weight_fraction_leaf=mwfratleaf,  
                                                            max_leaf_nodes=max_leaf, bootstrap=bootstrap_, 
                                                            oob_score=oob_, n_jobs=-1 , warm_start=warm_start_ 
                                                            )
                        res_tuple = perform_cross_validation(theRndForestModel, trainDataParam, testDataParam, 2) 
                        res_combo_dict[key_for_dict] = res_tuple
                      print "##########" 
  return res_combo_dict