def process_one_file(f): print f csv_r = csv.reader(open(f)) csv_r.next() # jump header x,y=[],[] for r in csv_r: tmp_t = datetime.datetime.strptime(r[1],'%Y-%m-%d %H:%M:%S') hour = r[2] minutes = r[3] v_occ_min = float(r[4]) w_occ_min = float(r[5]) v_occ = float(r[6]) win = float(r[7]) wout = float(r[8]) raw_v_occ = float(r[9]) _y = float(r[-1]) x.append([hour,minutes,v_occ_min,w_occ_min,v_occ,win,wout,raw_v_occ]) y.append(_y) x,y=np.array(x),np.array(y) x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=233) t = GradientBoostingRegressor() t.fit(x_train,y_train) predict = t.predict(x_test) predict_all = t.predict(x) print 'gbrt',f_mae(predict,y_test),f_rms(predict,y_test),f_mae(predict_all,y),f_rms(predict_all,y) # define base models base_models = [GradientBoostingRegressor(n_estimators=100), RandomForestRegressor(n_estimators=100, n_jobs=-1), ExtraTreesRegressor(n_estimators=100, n_jobs=-1)] # define blending model blending_model = LinearRegression() # initialize multi-stage model sg = StackedGeneralizer(base_models, blending_model, n_folds=N_FOLDS, verbose=VERBOSE) # fit model sg.fit(x_train,y_train) predict = sg.predict(x_test) predict_all = sg.predict(x) print 'stack', f_mae(predict, y_test), f_rms(predict, y_test), f_mae(predict_all, y), f_rms(predict_all, y) print ''
# define base models base_models = [ GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100) ] # define blending model blending_model = LogisticRegression() # initialize multi-stage model sg = StackedGeneralizer(base_models, blending_model, n_folds=N_FOLDS, verbose=VERBOSE) # fit model sg.fit(train_samples[:n_train], labels[:n_train]) # Generate Test Data test_locations = locations[n_train:] test_times = times[n_train:] test_data_img = train_data_img[n_train:] n_test = test_locations.shape[0] * 2 isolated_test_locations = np.zeros((n_test, test_locations.shape[2])) isolated_test_times = np.zeros(n_test)
# define base models base_models = [ GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100) ] # define blending model blending_model = LogisticRegression() # initialize multi-stage model sg = StackedGeneralizer(base_models, blending_model, n_folds=N_FOLDS, verbose=VERBOSE) # fit model sg.fit(train_samples, labels) # Generate Test Data def get_detection_locations_and_scores(frame_idx, detection_boxes): i = 0 is_first_time = True result = None while (detection_boxes[i][0] <= frame_idx): if (detection_boxes[i][0] == frame_idx): if is_first_time:
shuffle_idx = np.random.permutation(y.shape[0]) X = train_sample[shuffle_idx] y = y[shuffle_idx] # hold out 20 percent of data for testing accuracy train_prct = 0.8 n_train = int(round(X.shape[0]*train_prct)) # define base models base_models = [GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100), GradientBoostingClassifier(n_estimators=100)] # define blending model blending_model = LogisticRegression() # initialize multi-stage model sg = StackedGeneralizer(base_models, blending_model, n_folds=N_FOLDS, verbose=VERBOSE) # fit model sg.fit(X[:n_train],y[:n_train]) # test accuracy pred = sg.predict(X[n_train:]) pred_classes = [np.argmax(p) for p in pred] _ = sg.evaluate(y[n_train:], pred_classes)
y = data.target shuffle_idx = np.random.permutation(y.size) X = X[shuffle_idx] y = y[shuffle_idx] # hold out 20 percent of data for testing accuracy train_prct = 0.8 n_train = int(round(X.shape[0]*train_prct)) # define base models base_models = [RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'), RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'), ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini')] # define blending model blending_model = LogisticRegression() # initialize multi-stage model sg = StackedGeneralizer(base_models, blending_model, n_folds=N_FOLDS, verbose=VERBOSE) # fit model sg.fit(X[:n_train],y[:n_train]) # test accuracy pred = sg.predict(X[n_train:]) pred_classes = [np.argmax(p) for p in pred] _ = sg.evaluate(y[n_train:], pred_classes)