Exemple #1
0
def test_generate_input_rows_df():
    #Let's generate the input rows that will be passed to the neural networks for prediction purposes
    input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION)
    
    assert(len(input_rows_df) > 0)
    
    features_list_check = ["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY", "DAY", "MONTH", "YEAR"]
     
    #Let's check if correct columns generated
    assert(all(feature_name in features_list_check for feature_name in list(input_rows_df.columns)))
    
    #let's check if correct number of columns
    assert(len(input_rows_df.columns) == len(features_list_check))
    
    #let's check if correct number of rows
    assert(len(input_rows_df) == (NUM_DAYS_PREDICTIONS + 1) * 24)
Exemple #2
0
def test_perform_predictions_ensemble_next_days():
    input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION)
    
    input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]]    
    
    #Let's load up the models for traffic_1 and traffic_2
    list_models_traffic_1 = load_ensemble_models_from_disk("TRAFFIC_1", SITI_CODSITO, MODELS_PATH)    
    list_models_traffic_2 = load_ensemble_models_from_disk("TRAFFIC_2", SITI_CODSITO, MODELS_PATH)    
    
    y_predicted_traffic_1, y_predicted_traffic_2 = perform_predictions_ensemble_next_days(list_models_traffic_1, list_models_traffic_2, input_rows_X)
    
    #Let's check if a proper number of predictions has been generated 
    assert(len(y_predicted_traffic_1) == len(input_rows_X) and len(y_predicted_traffic_2) == len(input_rows_X))
    
    #Let's check if correct values have been indeed generated
    assert(all(is_int_valid_in_range(y_pred_label_1, 1, 5) for y_pred_label_1 in y_predicted_traffic_1))
    assert(all(is_int_valid_in_range(y_pred_label_2, 1, 5) for y_pred_label_2 in y_predicted_traffic_2))
Exemple #3
0
def test_perform_predictions_numeric_nn_next_days():
    input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION)
    
    input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]]  
    
    list_objects_count_1 = load_numeric_nn_model_scal_enc("COUNT_1", SITI_CODSITO, models_path=MODELS_PATH)
    list_objects_count_2 = load_numeric_nn_model_scal_enc("COUNT_2", SITI_CODSITO, models_path=MODELS_PATH)
    
    assert(len(list_objects_count_1) == len(list_objects_count_2))

    y_num_pred_count_1, y_num_pred_count_2 = perform_predictions_numeric_nn_next_days(list_objects_count_1, list_objects_count_2, input_rows_X)

    #Let's see if the proper output is generated from this method
    assert(len(y_num_pred_count_1) == len(y_num_pred_count_2) and len(y_num_pred_count_1) == len(input_rows_X) and len(y_num_pred_count_2) == len(input_rows_X))
    
    #Predicted values are non-negative...
    assert(all(y_num_pred > 0 for y_num_pred in y_num_pred_count_1))
    assert(all(y_num_pred > 0 for y_num_pred in y_num_pred_count_2))
    
    #Predicted values are integers...
    assert(all(type(y_num_pred) == int for y_num_pred in y_num_pred_count_1))
    assert(all(type(y_num_pred) == int for y_num_pred in y_num_pred_count_2))
Exemple #4
0
def test_perform_numeric_nn_predictions():
    input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION)
    
    input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]]  
    
    list_num_traffic_labels = ["COUNT_1", "COUNT_2"]
    
    
    for traffic_num_label in list_num_traffic_labels:
        #let's load up the objects for performing a prediction with a neural network
        list_objects_count_1 = load_numeric_nn_model_scal_enc(traffic_num_label, SITI_CODSITO, models_path=MODELS_PATH)
        assert(list_objects_count_1 != None)
        #Let's use these objects for performing predictions
        y_num_pred_count_1 = perform_numeric_nn_prediction(list_objects_count_1, input_rows_X)
        #Let's check if there are as many y values predicted generated as there are input row containing features
        assert(len(y_num_pred_count_1) == len(input_rows_X))
        
        #Let's check if the predicted y values are >= 0
        assert(all(y_num_pred > 0 for y_num_pred in y_num_pred_count_1))
        
        #Let's check if the predicted values are indeed integers
        assert(all(type(y_num_pred) == int for y_num_pred in y_num_pred_count_1))
Exemple #5
0
def test_perform_models_ensemble_prediction():
    #Let's load up  the models
    input_rows_df = generate_input_rows_df(NUM_DAYS_PREDICTIONS, INDEX_WEATHER_STATION)
    
    input_rows_X = input_rows_df[["TEMPERATURE", "NIEDERSCHLAG", "HOUR", "WEEK_DAY"]]
    
    list_traffic_labels = ["TRAFFIC_1", "TRAFFIC_2"]
    list_model_names = ["Optimized Decision Tree", "Simple Random Forest","Best KNN"]
    
    for traffic_level_label in list_traffic_labels:
        #Let's load up the ensemble models for both labels
        list_models = load_ensemble_models_from_disk(traffic_level_label, SITI_CODSITO, MODELS_PATH)    
        assert(len(list_models) == 3)
        assert(list_models != None)
        #Predictions using Random Forest model
        y_labels_rf = list_models[0].predict(input_rows_X)
        #Predictions using KNN classifier
        y_labels_knn = list_models[1].predict(input_rows_X)
        #Predictions using Decision Tree
        y_labels_dec_tree = list_models[2].predict(input_rows_X)
        
        #Let's check if the predictions obtained are as many as the rows in the input data frame and host values in range [1,4]
        assert(len(y_labels_rf) == len(input_rows_X))
        assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_rf))

        assert(len(y_labels_knn) == len(input_rows_X))
        assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_knn))

        assert(len(y_labels_dec_tree) == len(input_rows_X))
        assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_dec_tree))
        
        #Let's perform ensemble predictions 
        y_labels_ensemble = perform_models_ensemble_prediction(input_rows_X, list_models, list_model_names)
        
        assert(y_labels_ensemble != None)
        assert(len(y_labels_ensemble) == len(input_rows_X))
        assert(all(is_int_valid_in_range(y_label, 1, 5) for y_label in y_labels_ensemble))