def model_pred(tr_x, tr_y, te_x, tr_x17, tr_y17, te_x17, quarter): tr_x, tr_y, _, _, pid = data_quarter(tr_x, tr_y, tr_x17, tr_y17, quarter, False) tr_x = MinMaxScaler().fit_transform(tr_x.as_matrix()) if quarter == 3: te_x = te_x.rename(columns={'parcelid':'date'}) te_x1 = te_x te_x1.loc[:,'date'] = 10 te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix()) te_x2 = te_x te_x2.loc[:,'date'] = 11 te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix()) te_x3 = te_x te_x3.loc[:,'date'] = 12 te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix()) tr_y = np.squeeze(tr_y.as_matrix()) np.random.seed(0) clf = KNeighborsRegressor(n_neighbors=int(np.sqrt(tr_x.shape[0])), weights='distance', p=1) clf.fit(tr_x, tr_y) pred1 = clf.predict(te_x1) pred2 = clf.predict(te_x2) pred3 = clf.predict(te_x3) elif quarter == 7: te_x17 = te_x17.rename(columns={'parcelid':'date'}) te_x1 = te_x17 te_x1.loc[:,'date'] = 22 te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix()) te_x2 = te_x17 te_x2.loc[:,'date'] = 23 te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix()) te_x3 = te_x17 te_x3.loc[:,'date'] = 24 te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix()) tr_y = np.squeeze(tr_y.as_matrix()) np.random.seed(0) clf = KNeighborsRegressor(n_neighbors=int(np.sqrt(tr_x.shape[0])), weights='distance', p=1) print 'training all done!' clf.fit(tr_x, tr_y) pred1 = clf.predict(te_x1) pred2 = pred1 #clf.predict(te_x2) pred3 = pred1 #clf.predict(te_x3) pred_train = clf.predict(tr_x) print('train mae score: {}'.format(mean_absolute_error(tr_y, pred_train))) pid1 = pid.to_frame().assign(f_knn=pred1) pid2 = pid.to_frame().assign(f_knn=pred2) pid3 = pid.to_frame().assign(f_knn=pred3) return pid1, pid2, pid3
def model_pred(tr_x, tr_y, te_x, tr_x17, tr_y17, te_x17, quarter, clf): tr_x, tr_y, _, _, pid = data_quarter(tr_x, tr_y, tr_x17, tr_y17, quarter, False) tr_x = MinMaxScaler().fit_transform(tr_x.as_matrix()) if quarter == 3: te_x = te_x.rename(columns={'parcelid': 'date'}) te_x1 = te_x te_x1.loc[:, 'date'] = 10 te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix()) te_x2 = te_x te_x2.loc[:, 'date'] = 11 te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix()) te_x3 = te_x te_x3.loc[:, 'date'] = 12 te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix()) tr_y = np.squeeze(tr_y.as_matrix()) np.random.seed(0) clf.fit(tr_x, tr_y) pred1 = clf.predict(te_x1) pred2 = clf.predict(te_x2) pred3 = clf.predict(te_x3) elif quarter == 7: te_x17 = te_x17.rename(columns={'parcelid': 'date'}) te_x1 = te_x17 te_x1.loc[:, 'date'] = 10 te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix()) te_x2 = te_x17 te_x2.loc[:, 'date'] = 11 te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix()) te_x3 = te_x17 te_x3.loc[:, 'date'] = 12 te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix()) tr_y = np.squeeze(tr_y.as_matrix()) np.random.seed(0) clf.fit(tr_x, tr_y) pred1 = clf.predict(te_x1) pred2 = clf.predict(te_x2) pred3 = clf.predict(te_x3) pred_train = clf.predict(tr_x) print('train mae score: {}'.format(mean_absolute_error(tr_y, pred_train))) pid1 = pid.to_frame().assign(f_svm=pred1) pid2 = pid.to_frame().assign(f_svm=pred2) pid3 = pid.to_frame().assign(f_svm=pred3) return pid1, pid2, pid3