Ejemplo n.º 1
0
def test_evaluate_inputs():
    """Test the evaluate_model script handles invalid inputs as expected."""
    with pytest.raises(ValueError) as excinfo1:
        # test data input
        score_input = {'pred_prob': ['0.998','0','0.99','0.914','0.993','0','0.006','0.999','0.00046','0.999'],
                       'pred': [1,0,1,1,1,0,0,1,0,1]}
        label_input = {'class':[0,1,0,1,0,1,0,0,1,0]}
        score_df = pd.DataFrame(score_input)
        label_df = pd.DataFrame(label_input)
        pre_defined_kwargs = {'metrics':["confusion_matrix"]}
        evaluate_model(label_df, score_df, **pre_defined_kwargs)

    with pytest.raises(ValueError) as excinfo2:
        # test data input
        score_input2 = {'pred_prob': [0.998,0,0.99,-1,0.993,0,0.006,0.999,0.00046,3],
                        'pred': [1,0,1,1,1,0,0,1,0,1]}
        label_input2 = {'class':[0,1,0,1,0,1,0,0,1,0]}
        score_df2 = pd.DataFrame(score_input2)
        label_df2 = pd.DataFrame(label_input2)
        pre_defined_kwargs2 = {'metrics':["confusion_matrix"]}
        evaluate_model(label_df2, score_df2, **pre_defined_kwargs2)

    # raise AssertionError if error message is not as expected
    assert str(excinfo1.value) == 'Input dataframe can only have numeric or boolean types!'
    assert str(excinfo2.value) == 'Probabilities needs to be in 0-1 range!'
Ejemplo n.º 2
0
def test_evaluate_indexing():
    """Test the evaluate_model script handles index out of bounds as expected."""
    with pytest.raises(IndexError) as excinfo:
        # test data input
        score_input = {'pred_prob': [0.998,0,0.99,0.914,0.993,0,0.006,0.999,0.00046,0.999]}
        label_input = {'class':[0,1,0,1,0,1,0,0,1,0]}
        score_df = pd.DataFrame(score_input)
        label_df = pd.DataFrame(label_input)
        pre_defined_kwargs = {'metrics':["confusion_matrix"]}
        evaluate_model(label_df, score_df, **pre_defined_kwargs)
    # raise AssertionError if error message is not as expected
    assert str(excinfo.value) == 'Index out of bounds!'
def test_evaluate_model_2():
    # Test garbage values are handled
    inputs = {
        'DATE': [
            'ABC', '2019-05-02', '2019-05-03', '2019-05-06', '2019-05-07',
            '2019-05-08', '2019-05-09', '2019-05-10', '2019-05-13',
            '2019-05-14', '2019-05-15', '2019-05-16'
        ],
        'EUR': [
            0.88, 0.88, 0.90, 0.89, 0.91, 0.91, 0.92, 0.91, 0.91, 0.90, 0.91,
            0.89
        ],
        'GBP': [
            0.78, 0.81, 0.8, 0.79, 0.795, 0.798, 0.785, 0.777, 0.782, 0.788,
            0.804, 0.79
        ],
        'INR': [
            69.27, 69.30, 69.98, 69.56, 69.24, 69.11, 69.32, 69.41, 69.64,
            69.83, 69.44, 69.22
        ],
    }
    rates = pd.DataFrame(data=inputs)

    ARIMA_models = {
        'P': [1, 0, 0, 2, 0],
        'D': [1, 1, 1, 1, 1],
        'Q': [0, 0, 1, 0, 2]
    }
    forecast_period = "DEF"  #Pass a garbage value in the forecast period

    with pytest.raises(SystemExit) as pytest_wrapped_e:
        actual_result = evaluate_model(rates, forecast_period, ARIMA_models)

    assert pytest_wrapped_e.type == SystemExit
def test_evaluate_model_1():
    #Test that the output is correct when proper values are passed
    inputs = {
        'DATE': [
            '2019-05-01', '2019-05-02', '2019-05-03', '2019-05-06',
            '2019-05-07', '2019-05-08', '2019-05-09', '2019-05-10',
            '2019-05-13', '2019-05-14', '2019-05-15', '2019-05-16'
        ],
        'EUR': [
            0.88, 0.88, 0.90, 0.89, 0.91, 0.91, 0.92, 0.91, 0.91, 0.90, 0.91,
            0.89
        ],
        'GBP': [
            0.78, 0.81, 0.8, 0.79, 0.795, 0.798, 0.785, 0.777, 0.782, 0.788,
            0.804, 0.79
        ],
        'INR': [
            69.27, 69.30, 69.98, 69.56, 69.24, 69.11, 69.32, 69.41, 69.64,
            69.83, 69.44, 69.22
        ],
    }
    rates = pd.DataFrame(data=inputs)

    ARIMA_models = {
        'P': [1, 0, 0, 2, 0],
        'D': [1, 1, 1, 1, 1],
        'Q': [0, 0, 1, 0, 2]
    }
    forecast_period = 3

    expected = {
        'P': [1, 0, 0, 2, 0],
        'D': [1, 1, 1, 1, 1],
        'Q': [0, 0, 1, 0, 2],
        'MAPE_INR': [0.469204, 0.477241, 0.370169, 0.245564, 0.355792],
        'MAPE_EUR': [2.281700, 1.944444, 3.387799, 0.620243, 1.302068],
        'MAPE_GBP': [2.549810, 1.448363, 1.751258, 2.291886, 0.701285]
    }
    expected_output = pd.DataFrame(data=expected)

    actual_result = evaluate_model(rates, forecast_period,
                                   ARIMA_models)[expected_output.columns]
    # Check type
    assert isinstance(expected_output, pd.DataFrame)

    # Check expected output
    assert_frame_equal(expected_output, actual_result)
Ejemplo n.º 5
0
def test_evaluate_model():
    """Test the functionality of evaluate_model."""
    # test data input
    score_input = {'pred_prob': [0.998,0,0.99,0.914,0.993,0,0.006,0.999,0.00046,0.999],
                   'pred': [1,0,1,1,1,0,0,1,0,1]}
    label_input = {'class':[0,1,0,1,0,1,0,0,1,0]}

    score_df = pd.DataFrame(score_input)
    label_df = pd.DataFrame(label_input)

    # desired output dataframe
    output = confusion_matrix(label_df, score_df.iloc[:,1])
    output_df = pd.DataFrame(output,
        index=['Actual Negative','Actual Positive'],
        columns=['Predicted Negative', 'Predicted Positive'])
    
    # add kwargs for function
    pre_defined_kwargs = {'metrics':["confusion_matrix"]}
    # raise AssertionError if dataframes do not match
    assert output_df.equals(evaluate_model(label_df, score_df, **pre_defined_kwargs))
Ejemplo n.º 6
0
            utils.save_results(im_data, gt_data, density_map, output_dir)
            log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (
                epoch, step, 1. / fps, gt_count, et_count)
            log_print(log_text, color='green', attrs=['bold'])
            re_cnt = True

        if re_cnt:
            t.tic()
            re_cnt = False

    if (epoch % 2 == 0):
        save_name = os.path.join(
            output_dir, '{}_{}_{}.h5'.format(method, dataset_name, epoch))
        network.save_net(save_name, net)
        #calculate error on the validation dataset
        mae, mse = evaluate_model(save_name, data_loader_val)
        if mae < best_mae:
            best_mae = mae
            best_mse = mse
            best_model = '{}_{}_{}.h5'.format(method, dataset_name, epoch)
        log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch, mae, mse)
        log_print(log_text, color='green', attrs=['bold'])
        log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % (
            best_mae, best_mse, best_model)
        log_print(log_text, color='green', attrs=['bold'])
        if use_tensorboard:
            exp.add_scalar_value('MAE', mae, step=epoch)
            exp.add_scalar_value('MSE', mse, step=epoch)
            exp.add_scalar_value('train_loss',
                                 train_loss / data_loader.get_num_samples(),
                                 step=epoch)
Ejemplo n.º 7
0
            utils.save_results(im_data,gt_data,density_map, output_dir)
            log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch,
                step, 1./fps, gt_count,et_count)
            log_print(log_text, color='green', attrs=['bold'])
            re_cnt = True    
    
       
        if re_cnt:                                
            t.tic()
            re_cnt = False

    if (epoch % 2 == 0):
        save_name = os.path.join(output_dir, '{}_{}_{}.h5'.format(method,dataset_name,epoch))
        network.save_net(save_name, net)     
        #calculate error on the validation dataset 
        mae,mse = evaluate_model(save_name, data_loader_val, "CrowdCounter_MSCNN")
        if mae < best_mae:
            best_mae = mae
            best_mse = mse
            best_model = '{}_{}_{}.h5'.format(method,dataset_name,epoch)
        log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch,mae,mse)
        log_print(log_text, color='green', attrs=['bold'])
        log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % (best_mae,best_mse, best_model)
        log_print(log_text, color='green', attrs=['bold'])
        if use_tensorboard:
            exp.add_scalar_value('MAE', mae, step=epoch)
            exp.add_scalar_value('MSE', mse, step=epoch)
            exp.add_scalar_value('train_loss', train_loss/data_loader.get_num_samples(), step=epoch)
        
    
Ejemplo n.º 8
0
                    print(
                        'epoch: %4d, step %6d, ground truth: %6.1f, estimate: %6.1f'
                        % (epoch, step, ground_truth_count, estimate_count),
                        flush=True)

            if is_save_model_in_epoch and number_of_train_samples % steps_to_save_model == 0:
                model_name = '{}_{}_{}.h5'.format(original_dataset_name, epoch,
                                                  step)
                save_model_path = os.path.join(output_dir, model_name)
                network.save_net(save_model_path, net)

                # evaluate the model of this epoch
                evaluate_result_dict = dict()
                for data_name in best_result_dict:
                    evaluate_data = all_data[data_name]
                    result = evaluate_model(save_model_path,
                                            evaluate_data['data'])
                    evaluate_result_dict[data_name] = result
                    txt_log_info.append(
                        'evaluate %s on %s: mae: %6.2f, mse: %6.2f, psnr: %6.2f, ssim: %6.2f, game: %6.2f, %6.2f, %6.2f, %6.2f'
                        % (result['name'], data_name, result['mae'],
                           result['mse'], result['psnr'], result['ssim'],
                           result['game_0'], result['game_1'],
                           result['game_2'], result['game_3']))

                # check if this model is new best model
                best_result_dict = compare_result(evaluate_result_dict,
                                                  best_result_dict, key_error)
                for data_name in best_result_dict:
                    result = best_result_dict[data_name]
                    txt_log_info.append(
                        'best model on %s is %s with %s of %.2f' %
Ejemplo n.º 9
0
def main():
    # extract_face_from_image('zhen', 'zhen', False)
    X_test, y_test = get_X_y_test('test', 'fake_face')
    # X_test, y_test = get_test_data_from_image()
    evaluate_model(X_test, y_test, model_name='AsNet_2.npz')