Example #1
0
def process_one_file(f):
    print f
    csv_r = csv.reader(open(f))
    csv_r.next() # jump header
    x,y=[],[]

    for r in csv_r:
        tmp_t = datetime.datetime.strptime(r[1],'%Y-%m-%d %H:%M:%S')
        hour = r[2]
        minutes = r[3]
        v_occ_min = float(r[4])
        w_occ_min = float(r[5])
        v_occ = float(r[6])
        win = float(r[7])
        wout = float(r[8])
        raw_v_occ = float(r[9])
        _y = float(r[-1])
        x.append([hour,minutes,v_occ_min,w_occ_min,v_occ,win,wout,raw_v_occ])
        y.append(_y)

    x,y=np.array(x),np.array(y)
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=233)

    t = GradientBoostingRegressor()
    t.fit(x_train,y_train)
    predict = t.predict(x_test)
    predict_all = t.predict(x)
    print 'gbrt',f_mae(predict,y_test),f_rms(predict,y_test),f_mae(predict_all,y),f_rms(predict_all,y)

    # define base models
    base_models = [GradientBoostingRegressor(n_estimators=100),
                   RandomForestRegressor(n_estimators=100, n_jobs=-1),
                   ExtraTreesRegressor(n_estimators=100, n_jobs=-1)]

    # define blending model
    blending_model = LinearRegression()

    # initialize multi-stage model
    sg = StackedGeneralizer(base_models, blending_model,
                            n_folds=N_FOLDS, verbose=VERBOSE)

    # fit model
    sg.fit(x_train,y_train)
    predict = sg.predict(x_test)
    predict_all = sg.predict(x)
    print 'stack', f_mae(predict, y_test), f_rms(predict, y_test), f_mae(predict_all, y), f_rms(predict_all, y)

    print ''
# define base models
base_models = [
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100)
]

# define blending model
blending_model = LogisticRegression()

# initialize multi-stage model
sg = StackedGeneralizer(base_models,
                        blending_model,
                        n_folds=N_FOLDS,
                        verbose=VERBOSE)

# fit model
sg.fit(train_samples[:n_train], labels[:n_train])

# Generate Test Data

test_locations = locations[n_train:]
test_times = times[n_train:]
test_data_img = train_data_img[n_train:]

n_test = test_locations.shape[0] * 2

isolated_test_locations = np.zeros((n_test, test_locations.shape[2]))
isolated_test_times = np.zeros(n_test)
# define base models
base_models = [
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100),
    GradientBoostingClassifier(n_estimators=100)
]

# define blending model
blending_model = LogisticRegression()

# initialize multi-stage model
sg = StackedGeneralizer(base_models,
                        blending_model,
                        n_folds=N_FOLDS,
                        verbose=VERBOSE)

# fit model
sg.fit(train_samples, labels)

# Generate Test Data


def get_detection_locations_and_scores(frame_idx, detection_boxes):
    i = 0
    is_first_time = True
    result = None
    while (detection_boxes[i][0] <= frame_idx):
        if (detection_boxes[i][0] == frame_idx):
            if is_first_time:
shuffle_idx = np.random.permutation(y.shape[0])

X = train_sample[shuffle_idx]
y = y[shuffle_idx]

# hold out 20 percent of data for testing accuracy
train_prct = 0.8
n_train = int(round(X.shape[0]*train_prct))

# define base models
base_models = [GradientBoostingClassifier(n_estimators=100),
               GradientBoostingClassifier(n_estimators=100),
               GradientBoostingClassifier(n_estimators=100)]

# define blending model
blending_model = LogisticRegression()

# initialize multi-stage model
sg = StackedGeneralizer(base_models, blending_model, 
                        n_folds=N_FOLDS, verbose=VERBOSE)

# fit model
sg.fit(X[:n_train],y[:n_train])

# test accuracy
pred = sg.predict(X[n_train:])
pred_classes = [np.argmax(p) for p in pred]

_ = sg.evaluate(y[n_train:], pred_classes)
Example #5
0
y = data.target

shuffle_idx = np.random.permutation(y.size)

X = X[shuffle_idx]
y = y[shuffle_idx]

# hold out 20 percent of data for testing accuracy
train_prct = 0.8
n_train = int(round(X.shape[0]*train_prct))

# define base models
base_models = [RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='gini'),
               RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion='entropy'),
               ExtraTreesClassifier(n_estimators=100, n_jobs=-1, criterion='gini')]

# define blending model
blending_model = LogisticRegression()

# initialize multi-stage model
sg = StackedGeneralizer(base_models, blending_model,
                        n_folds=N_FOLDS, verbose=VERBOSE)

# fit model
sg.fit(X[:n_train],y[:n_train])

# test accuracy
pred = sg.predict(X[n_train:])
pred_classes = [np.argmax(p) for p in pred]

_ = sg.evaluate(y[n_train:], pred_classes)