epochs = 10000 batch_size = 64 layer_dims = {"in": 13, "fc1": 10, "fc2": 10, "fc3": 10, "fc4": 10, "out": 1} dtype = torch.Tensor def rmsle(y_pred, y_true): log_err = torch.log(y_pred + 1) - torch.log(y_true + 1) squared_le = torch.pow(log_err, 2) mean_sle = torch.mean(squared_le) root_msle = torch.sqrt(mean_sle) return (root_msle) if __name__ == '__main__': _, _, _, X_train, Y_train, Y_train_log, X_val, Y_val, X_test, test_date_df = du.get_processed_df( 'data/train.csv', 'data/test.csv') train_data_size = X_train.shape[0] print(train_data_size) steps_in_epoch = train_data_size // batch_size #Dividing data to train and val datasets, conversion from DF to numpyarray X_train = np.array(X_train) Y_train = np.array(Y_train) Y_train_log = np.array(Y_train_log) X_val = np.array(X_val) Y_val = np.array(Y_val) X_test = np.array(X_test) # Create random Tensors to hold inputs and outputs, and wrap them in Variables. X_train = Variable(torch.Tensor(X_train))
reverse_opts = [False] tested_params = Cs val_error_hist = np.zeros(len(tested_params)) train_error_hist = np.zeros(len(tested_params)) # Definitions of other kernels one may want to use # svr_lin = SVR(kernel='linear', C=1000) # svr_poly = SVR(kernel='poly', C=1000, degree=2, gamma=gamma) for i, C in enumerate(tested_params): for name in ["Gaussian"]: for reverse in reverse_opts: #Getting seperate train and val datasets to control data distribution #train_x,train_y,Y_train_log,val_x,val_y = du.get_sep_datasets(datasetX,datasetY,TRAIN_SIZE,reverse_data_order=reverse) df_x, _, df_y_log, train_x, train_y, train_y_log, val_x, val_y, test_x, test_date_df = du.get_processed_df( 'data/train.csv', 'data/test.csv') #Training our regression model regressor = SVR(kernel='rbf', C=C, gamma=gamma) #regressor.fit(X_train, Y_train_log) regressor.fit(train_x, train_y_log) #Making predictions on train set predictions_train_log = regressor.predict(train_x) predictions_train = np.exp(predictions_train_log) - 1 predictions_train = np.maximum(0, predictions_train) train_error = rmsle(predictions_train, train_y) train_error_hist[i] += train_error # Making predictions on val set predictions_val_log = regressor.predict(val_x)
import numpy as np from sklearn.ensemble import RandomForestRegressor from data_utils import DataUtils as du def get_rmsle(y_pred, y_actual): diff = np.log(y_pred + 1) - np.log(y_actual + 1) mean_error = np.square(diff).mean() return np.sqrt(mean_error) if __name__ == '__main__': df_x, _, df_y_log, train_x, train_y, train_y_log, val_x, val_y, test_x, test_date_df = du.get_processed_df( '../data/train.csv', '../data/test.csv', output_cols=['registered', 'casual', 'count'], model="rrf", normalize=False) train_y_log_reg = train_y_log['registered'].as_matrix() train_y_log_cas = train_y_log['casual'].as_matrix() train_y = train_y['count'].as_matrix() max_depth_params = np.arange(1, 35, 1) max_depth = 20 n_estimators_params = np.arange(1, 1000, 10) n_estimators = 300 min_split_params = np.arange(12, 13, 1) min_split = 12 tested_params = min_split_params
TOTAL_DATASET_SIZE = 10887 def rmsle(y_true, y_pred): y_count_pred = KB.sum(y_pred, axis=1) y_count_true = KB.sum(y_true, axis=1) return KB.sqrt( KB.mean(KB.square(KB.log(y_count_pred + 1) - KB.log(y_count_true + 1)))) if __name__ == '__main__': output_columns = ['registered', 'casual'] df_x, _, df_y_log, train_x, train_y, train_y_log, val_x, val_y, test_x, test_date_df = \ du.get_processed_df('../data/train.csv', '../data/test.csv',output_cols=output_columns, model = "rrf",randomize=True) print("Dataset loaded, train_setX:", train_x.shape, ", train_setY:", train_y.shape, ", val_setX:", val_x.shape, ", val_setY:", val_y.shape) df_x = np.array(df_x) df_y_log = np.array(df_y_log) train_x = np.array(train_x) train_y = np.array(train_y) train_y_log = np.array(train_y_log) val_x = np.array(val_x) val_y = np.array(val_y) val_y = np.reshape(val_y, newshape=(val_y.shape[0], 1)) test_x = np.array(test_x)
test_error = 1000 def rmsle(y_pred, y_true): log_err = (np.log(y_pred + 1) - np.log(y_true + 1)) squared_le = np.power(log_err, 2) mean_sle = np.mean(squared_le) root_msle = np.sqrt(mean_sle) return (root_msle) if __name__ == '__main__': df_x, df_y, df_y_log, train_x, train_y, train_y_log, val_x, val_y, test_x, test_date_df = du.get_processed_df( '../data/train.csv', '../data/test.csv', output_cols=['count'], normalize=False) val_y = np.reshape(val_y.as_matrix(), newshape=(val_y.shape[0], 1)) k_parameters = range(start_k_neighbours, start_k_neighbours + num_k_neighbours) for i, k in enumerate(k_parameters): # Training Knn regressor knn_regressor = KNeighborsRegressor(n_neighbors=k, weights='uniform') knn_regressor.fit(train_x, train_y_log) # Making predictions on train set predictions_train_log = knn_regressor.predict(train_x)
def get_predicions(model_reg, model_cas, model_name): df_x, _, df_y_log, train_x, train_y, train_y_log, val_x, val_y, test_x, test_date_df = du.get_processed_df( '../data/train.csv', '../data/test.csv', output_cols=['registered', 'casual', 'count'], model=model_name, normalize=False) y_pred_val_reg = np.exp(model_reg.predict(val_x)) - 1 y_pred_val_cas = np.exp(model_cas.predict(val_x)) - 1 y_pred_train_reg = np.exp(model_reg.predict(train_x)) - 1 y_pred_train_cas = np.exp(model_cas.predict(train_x)) - 1 y_pred_test_reg = np.exp(model_reg.predict(test_x)) - 1 y_pred_test_cas = np.exp(model_cas.predict(test_x)) - 1 y_pred_val = np.round(y_pred_val_reg + y_pred_val_cas) y_pred_val[y_pred_val < 0] = 0 y_pred_train = np.round(y_pred_train_reg + y_pred_train_cas) y_pred_train[y_pred_train < 0] = 0 y_pred_test = np.round(y_pred_test_reg + y_pred_test_cas) y_pred_test[y_pred_test < 0] = 0 return y_pred_val, y_pred_train, y_pred_test, val_y, train_y[ 'count'], test_date_df