def test_evaluate_inputs(): """Test the evaluate_model script handles invalid inputs as expected.""" with pytest.raises(ValueError) as excinfo1: # test data input score_input = {'pred_prob': ['0.998','0','0.99','0.914','0.993','0','0.006','0.999','0.00046','0.999'], 'pred': [1,0,1,1,1,0,0,1,0,1]} label_input = {'class':[0,1,0,1,0,1,0,0,1,0]} score_df = pd.DataFrame(score_input) label_df = pd.DataFrame(label_input) pre_defined_kwargs = {'metrics':["confusion_matrix"]} evaluate_model(label_df, score_df, **pre_defined_kwargs) with pytest.raises(ValueError) as excinfo2: # test data input score_input2 = {'pred_prob': [0.998,0,0.99,-1,0.993,0,0.006,0.999,0.00046,3], 'pred': [1,0,1,1,1,0,0,1,0,1]} label_input2 = {'class':[0,1,0,1,0,1,0,0,1,0]} score_df2 = pd.DataFrame(score_input2) label_df2 = pd.DataFrame(label_input2) pre_defined_kwargs2 = {'metrics':["confusion_matrix"]} evaluate_model(label_df2, score_df2, **pre_defined_kwargs2) # raise AssertionError if error message is not as expected assert str(excinfo1.value) == 'Input dataframe can only have numeric or boolean types!' assert str(excinfo2.value) == 'Probabilities needs to be in 0-1 range!'
def test_evaluate_indexing(): """Test the evaluate_model script handles index out of bounds as expected.""" with pytest.raises(IndexError) as excinfo: # test data input score_input = {'pred_prob': [0.998,0,0.99,0.914,0.993,0,0.006,0.999,0.00046,0.999]} label_input = {'class':[0,1,0,1,0,1,0,0,1,0]} score_df = pd.DataFrame(score_input) label_df = pd.DataFrame(label_input) pre_defined_kwargs = {'metrics':["confusion_matrix"]} evaluate_model(label_df, score_df, **pre_defined_kwargs) # raise AssertionError if error message is not as expected assert str(excinfo.value) == 'Index out of bounds!'
def test_evaluate_model_2(): # Test garbage values are handled inputs = { 'DATE': [ 'ABC', '2019-05-02', '2019-05-03', '2019-05-06', '2019-05-07', '2019-05-08', '2019-05-09', '2019-05-10', '2019-05-13', '2019-05-14', '2019-05-15', '2019-05-16' ], 'EUR': [ 0.88, 0.88, 0.90, 0.89, 0.91, 0.91, 0.92, 0.91, 0.91, 0.90, 0.91, 0.89 ], 'GBP': [ 0.78, 0.81, 0.8, 0.79, 0.795, 0.798, 0.785, 0.777, 0.782, 0.788, 0.804, 0.79 ], 'INR': [ 69.27, 69.30, 69.98, 69.56, 69.24, 69.11, 69.32, 69.41, 69.64, 69.83, 69.44, 69.22 ], } rates = pd.DataFrame(data=inputs) ARIMA_models = { 'P': [1, 0, 0, 2, 0], 'D': [1, 1, 1, 1, 1], 'Q': [0, 0, 1, 0, 2] } forecast_period = "DEF" #Pass a garbage value in the forecast period with pytest.raises(SystemExit) as pytest_wrapped_e: actual_result = evaluate_model(rates, forecast_period, ARIMA_models) assert pytest_wrapped_e.type == SystemExit
def test_evaluate_model_1(): #Test that the output is correct when proper values are passed inputs = { 'DATE': [ '2019-05-01', '2019-05-02', '2019-05-03', '2019-05-06', '2019-05-07', '2019-05-08', '2019-05-09', '2019-05-10', '2019-05-13', '2019-05-14', '2019-05-15', '2019-05-16' ], 'EUR': [ 0.88, 0.88, 0.90, 0.89, 0.91, 0.91, 0.92, 0.91, 0.91, 0.90, 0.91, 0.89 ], 'GBP': [ 0.78, 0.81, 0.8, 0.79, 0.795, 0.798, 0.785, 0.777, 0.782, 0.788, 0.804, 0.79 ], 'INR': [ 69.27, 69.30, 69.98, 69.56, 69.24, 69.11, 69.32, 69.41, 69.64, 69.83, 69.44, 69.22 ], } rates = pd.DataFrame(data=inputs) ARIMA_models = { 'P': [1, 0, 0, 2, 0], 'D': [1, 1, 1, 1, 1], 'Q': [0, 0, 1, 0, 2] } forecast_period = 3 expected = { 'P': [1, 0, 0, 2, 0], 'D': [1, 1, 1, 1, 1], 'Q': [0, 0, 1, 0, 2], 'MAPE_INR': [0.469204, 0.477241, 0.370169, 0.245564, 0.355792], 'MAPE_EUR': [2.281700, 1.944444, 3.387799, 0.620243, 1.302068], 'MAPE_GBP': [2.549810, 1.448363, 1.751258, 2.291886, 0.701285] } expected_output = pd.DataFrame(data=expected) actual_result = evaluate_model(rates, forecast_period, ARIMA_models)[expected_output.columns] # Check type assert isinstance(expected_output, pd.DataFrame) # Check expected output assert_frame_equal(expected_output, actual_result)
def test_evaluate_model(): """Test the functionality of evaluate_model.""" # test data input score_input = {'pred_prob': [0.998,0,0.99,0.914,0.993,0,0.006,0.999,0.00046,0.999], 'pred': [1,0,1,1,1,0,0,1,0,1]} label_input = {'class':[0,1,0,1,0,1,0,0,1,0]} score_df = pd.DataFrame(score_input) label_df = pd.DataFrame(label_input) # desired output dataframe output = confusion_matrix(label_df, score_df.iloc[:,1]) output_df = pd.DataFrame(output, index=['Actual Negative','Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']) # add kwargs for function pre_defined_kwargs = {'metrics':["confusion_matrix"]} # raise AssertionError if dataframes do not match assert output_df.equals(evaluate_model(label_df, score_df, **pre_defined_kwargs))
utils.save_results(im_data, gt_data, density_map, output_dir) log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % ( epoch, step, 1. / fps, gt_count, et_count) log_print(log_text, color='green', attrs=['bold']) re_cnt = True if re_cnt: t.tic() re_cnt = False if (epoch % 2 == 0): save_name = os.path.join( output_dir, '{}_{}_{}.h5'.format(method, dataset_name, epoch)) network.save_net(save_name, net) #calculate error on the validation dataset mae, mse = evaluate_model(save_name, data_loader_val) if mae < best_mae: best_mae = mae best_mse = mse best_model = '{}_{}_{}.h5'.format(method, dataset_name, epoch) log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch, mae, mse) log_print(log_text, color='green', attrs=['bold']) log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % ( best_mae, best_mse, best_model) log_print(log_text, color='green', attrs=['bold']) if use_tensorboard: exp.add_scalar_value('MAE', mae, step=epoch) exp.add_scalar_value('MSE', mse, step=epoch) exp.add_scalar_value('train_loss', train_loss / data_loader.get_num_samples(), step=epoch)
utils.save_results(im_data,gt_data,density_map, output_dir) log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch, step, 1./fps, gt_count,et_count) log_print(log_text, color='green', attrs=['bold']) re_cnt = True if re_cnt: t.tic() re_cnt = False if (epoch % 2 == 0): save_name = os.path.join(output_dir, '{}_{}_{}.h5'.format(method,dataset_name,epoch)) network.save_net(save_name, net) #calculate error on the validation dataset mae,mse = evaluate_model(save_name, data_loader_val, "CrowdCounter_MSCNN") if mae < best_mae: best_mae = mae best_mse = mse best_model = '{}_{}_{}.h5'.format(method,dataset_name,epoch) log_text = 'EPOCH: %d, MAE: %.1f, MSE: %0.1f' % (epoch,mae,mse) log_print(log_text, color='green', attrs=['bold']) log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % (best_mae,best_mse, best_model) log_print(log_text, color='green', attrs=['bold']) if use_tensorboard: exp.add_scalar_value('MAE', mae, step=epoch) exp.add_scalar_value('MSE', mse, step=epoch) exp.add_scalar_value('train_loss', train_loss/data_loader.get_num_samples(), step=epoch)
print( 'epoch: %4d, step %6d, ground truth: %6.1f, estimate: %6.1f' % (epoch, step, ground_truth_count, estimate_count), flush=True) if is_save_model_in_epoch and number_of_train_samples % steps_to_save_model == 0: model_name = '{}_{}_{}.h5'.format(original_dataset_name, epoch, step) save_model_path = os.path.join(output_dir, model_name) network.save_net(save_model_path, net) # evaluate the model of this epoch evaluate_result_dict = dict() for data_name in best_result_dict: evaluate_data = all_data[data_name] result = evaluate_model(save_model_path, evaluate_data['data']) evaluate_result_dict[data_name] = result txt_log_info.append( 'evaluate %s on %s: mae: %6.2f, mse: %6.2f, psnr: %6.2f, ssim: %6.2f, game: %6.2f, %6.2f, %6.2f, %6.2f' % (result['name'], data_name, result['mae'], result['mse'], result['psnr'], result['ssim'], result['game_0'], result['game_1'], result['game_2'], result['game_3'])) # check if this model is new best model best_result_dict = compare_result(evaluate_result_dict, best_result_dict, key_error) for data_name in best_result_dict: result = best_result_dict[data_name] txt_log_info.append( 'best model on %s is %s with %s of %.2f' %
def main(): # extract_face_from_image('zhen', 'zhen', False) X_test, y_test = get_X_y_test('test', 'fake_face') # X_test, y_test = get_test_data_from_image() evaluate_model(X_test, y_test, model_name='AsNet_2.npz')