CROSS_VALIDATION_FOLDS = 4 num_feature_list = [4,8,15] depth_list = range(8,21,2) #range(1,8) + range(8,21,2) accuracy_results = {} for response in response_columns: for num_features in num_feature_list: feature_columns = all_feature_columns[:num_features] for max_depth in depth_list: print "---working on response %s | num_features %s | max_depth %s" % (response, num_features, max_depth) clf = DecisionTreeClassifier(max_depth = max_depth) X = one_minute_dataframe[feature_columns] y = one_minute_dataframe[response] cross_val_score_list = cross_val_score(clf, X, y, cv=tscvsplit(one_minute_dataframe,CROSS_VALIDATION_FOLDS), n_jobs = -1) if response not in accuracy_results: accuracy_results[response] = {num_features: {max_depth: np.mean(cross_val_score_list)}} else: if num_features not in accuracy_results[response]: accuracy_results[response][num_features] = {max_depth: np.mean(cross_val_score_list)} else: accuracy_results[response][num_features][max_depth] = np.mean(cross_val_score_list) print "response\tnum_features\tmax_depth\taccuracy" for response in response_columns: for num_features in num_feature_list: for max_depth in depth_list: print "%s\t%s\t\t%s\t\t%0.15f" % (response, num_features, max_depth, accuracy_results[response][num_features][max_depth])
z_results = {} for response in response_columns: for num_features in num_feature_list: feature_columns = all_feature_columns[:num_features] for max_depth in depth_list: print "---working on response %s | num_features %s | max_depth %s" % (response, num_features, max_depth) regressor = DecisionTreeRegressor(max_depth = max_depth) X = one_minute_dataframe[feature_columns] y = one_minute_dataframe[response] # regressor.fit(X,y) # trade_z_score_func(y, regressor.predict(X)) cross_val_z_list = cross_val_custom_score(regressor, X, y, tscvsplit(one_minute_dataframe,CROSS_VALIDATION_FOLDS), trade_z_score_func) # cross_val_sse_list = cross_val_score(regressor, X, y, scoring=trade_z_score) #cv=tscvsplit(one_minute_dataframe,CROSS_VALIDATION_FOLDS), n_jobs = 1) if response not in z_results: z_results[response] = {num_features: {max_depth: np.mean(cross_val_z_list)}} else: if num_features not in z_results[response]: z_results[response][num_features] = {max_depth: np.mean(cross_val_z_list)} else: z_results[response][num_features][max_depth] = np.mean(cross_val_z_list) print "response\tnum_features\tmax_depth\ttrade_z" for response in response_columns: for num_features in num_feature_list: for max_depth in depth_list: print "%s\t%s\t\t%s\t\t%0.15f" % (response, num_features, max_depth, z_results[response][num_features][max_depth])
rmse_results = {} for response in response_columns: for num_features in num_feature_list: feature_columns = all_feature_columns[:num_features] for max_depth in depth_list: print "---working on response %s | num_features %s | max_depth %s" % (response, num_features, max_depth) regressor = DecisionTreeRegressor(max_depth=max_depth) X = one_minute_dataframe[feature_columns] y = one_minute_dataframe[response] cross_val_sse_list = -cross_val_score( regressor, X, y, scoring="mean_squared_error", cv=tscvsplit(one_minute_dataframe, CROSS_VALIDATION_FOLDS), n_jobs=-1, ) if response not in rmse_results: rmse_results[response] = { num_features: {max_depth: np.mean([np.sqrt(x) for x in cross_val_sse_list])} } else: if num_features not in rmse_results[response]: rmse_results[response][num_features] = { max_depth: np.mean([np.sqrt(x) for x in cross_val_sse_list]) } else: rmse_results[response][num_features][max_depth] = np.mean( [np.sqrt(x) for x in cross_val_sse_list] )