mape = abs(pred[i] - target_train[i]) / target_train[i] cleaned.append((feature_train[i], target_train[i], pred[i], mape)) cleaned.sort(key = lambda x: x[3]) cleaned = cleaned[: int(len(pred) * 0.80)] feature_train, target_train, pred, mape = zip(* cleaned) start_time = time() model.fit(feature_train, target_train) print("re-training time after cleaning outliers", round(time() - start_time, 3), "s") # Start prediction using cross validation set start_time = time() pred = model.predict(feature_test) print("predition time", round(time() - start_time, 3), "s") # Show MAPE score of trained model with cross validation set score = mape_score(pred, target_test) print('MAPE score ', score) # Save model for later use save_model(model, './model', MODEL_NAME) from result_helper import save_result data_file = '../../processed_data/prediction_data/to_predict_features' test_metadata_list, test_features_list, test_target_list = load_and_split_data(data_file) test_features_list = feature_selection.transform(test_features_list)
if __name__ == "__main__": # Load and split training data into metadata, features, target data_file = '../../processed_data/features_data/training_data/features_ready' metadata_list, features_list, target_list = load_and_split_data(data_file) # Split data into training set and cross validation set feature_train, feature_test, target_train, target_test = train_test_split( features_list, target_list, test_size=TEST_SIZE, random_state=RANDOM_STATE) # Start training start_time = time() model = create_model() model.fit(feature_train, target_train) print("training time", round(time() - start_time, 3), "s") # Start prediction using cross validation set start_time = time() pred = model.predict(feature_test) print("predition time", round(time() - start_time, 3), "s") # Show MAPE score of trained model with cross validation set score = mape_score(pred, target_test) print('MAPE score ', score) # Save model for later use save_model(model, './model', MODEL_NAME)