def test_seperation_results(): from aict_tools.scripts.train_separation_model import main as train from aict_tools.io import read_telescope_data from aict_tools.apply import predict_separator import joblib from aict_tools.configuration import AICTConfig configuration_path = 'examples/full_config.yaml' expectation = pd.read_csv('tests/expected_results.csv') with tempfile.TemporaryDirectory(prefix='aict_tools_test_') as d: gamma_path = os.path.join(d, 'gamma.hdf5') proton_path = os.path.join(d, 'proton.hdf5') model_path = os.path.join(d, 'test.pkl') shutil.copy('examples/gamma.hdf5', gamma_path) shutil.copy('examples/proton.hdf5', proton_path) runner = CliRunner() result = runner.invoke(train, [ configuration_path, gamma_path, proton_path, os.path.join(d, 'test.hdf5'), model_path, ]) assert result.exit_code == 0 config = AICTConfig.from_yaml(configuration_path) model_config = config.energy model = joblib.load(model_path) df = read_telescope_data( proton_path, config, model_config.columns_to_read_apply, feature_generation_config=model_config.feature_generation) protons_prediction = predict_separator( df[model_config.features], model, ) df = read_telescope_data( gamma_path, config, model_config.columns_to_read_apply, feature_generation_config=model_config.feature_generation) gammas_prediction = predict_separator( df[model_config.features], model, ) np.testing.assert_array_almost_equal( protons_prediction, expectation['separator_prediction_on_protons']) np.testing.assert_array_almost_equal( gammas_prediction, expectation['separator_prediction_on_gammas'])
def test_read_chunks_cta_dl1(cta_file, cta_config): from aict_tools.io import read_telescope_data, read_telescope_data_chunked import pandas as pd from pandas.testing import assert_frame_equal chunk_size = 500 # choose some columns from different tables in the file columns = [ "true_energy", "azimuth", "equivalent_focal_length", "hillas_width", "tel_id", "event_id", "obs_id", ] cta_file = str(cta_file) generator = read_telescope_data_chunked(cta_file, cta_config, chunk_size, columns=columns) df1 = pd.concat([df for df, _, _ in generator]) df2 = read_telescope_data(cta_file, cta_config, columns=columns) assert_frame_equal(df1, df2) # make sure we only loaded the telescopes we wanted np.testing.assert_array_equal( df2.tel_id.unique(), [int(x.split("_")[1]) for x in cta_config.telescopes])
def test_read_chunks_cta(cta_file, cta_config, chunk_size): from aict_tools.io import read_telescope_data, read_telescope_data_chunked import pandas as pd from pandas.util.testing import assert_frame_equal columns = ['width', 'num_triggered_telescopes', 'telescope_id'] generator = read_telescope_data_chunked(cta_file, cta_config, chunk_size, columns=columns) df1 = pd.concat([df for df, _, _ in generator]).reset_index(drop=True) df2 = read_telescope_data(cta_file, cta_config, columns=columns) assert_frame_equal(df1, df2)
def test_read_default_columns_chunked(hdf5_file): from aict_tools.io import read_telescope_data, read_telescope_data_chunked import pandas as pd from pandas.util.testing import assert_frame_equal path, table_name, config = hdf5_file generator = read_telescope_data_chunked(path, config, 100) df_chunked = pd.concat([df for df, _, _ in generator]).reset_index(drop=True) df = read_telescope_data(path, config).reset_index(drop=True) assert_frame_equal(df, df_chunked)
def test_energy_regression_results(): from aict_tools.scripts.train_energy_regressor import main as train from aict_tools.io import read_telescope_data from aict_tools.apply import predict_energy import joblib from aict_tools.configuration import AICTConfig configuration_path = "examples/full_config.yaml" with tempfile.TemporaryDirectory(prefix="aict_tools_test_") as d: data_path = os.path.join(d, "gamma.hdf5") model_path = os.path.join(d, "test.pkl") shutil.copy("examples/gamma.hdf5", data_path) runner = CliRunner() result = runner.invoke( train, [ configuration_path, data_path, os.path.join(d, "test.hdf5"), model_path, ], ) assert result.exit_code == 0 config = AICTConfig.from_yaml(configuration_path) model_config = config.energy model = joblib.load(model_path) df = read_telescope_data( data_path, config, model_config.columns_to_read_apply, feature_generation_config=model_config.feature_generation, ) energy_prediction = predict_energy( df[model_config.features], model, log_target=model_config.log_target, ) expectation = pd.read_csv("tests/expected_results.csv") np.testing.assert_array_almost_equal(energy_prediction, expectation["energy_prediction"])
def test_read_telescope_data_feature_gen(hdf5_file, fact_config): from aict_tools.io import read_telescope_data columns = fact_config.energy.columns_to_read_train path, _, _ = hdf5_file feature_gen_config = fact_config.energy.feature_generation df = read_telescope_data( path, fact_config, columns=columns, feature_generation_config=feature_gen_config ) assert set(df.columns) == set(fact_config.energy.features) | set( [fact_config.energy.target_column] ) # new column with name 'area' should exist after feature generation assert 'area' in df.columns
def test_read_chunks(hdf5_file): from aict_tools.io import read_telescope_data_chunked, read_telescope_data import pandas as pd from pandas.util.testing import assert_frame_equal path, table_name, config = hdf5_file cols = ['width', 'length', ] chunk_size = 125 generator = read_telescope_data_chunked(path, config, chunk_size, cols) dfs = [] for df, _, _ in generator: dfs.append(df) assert not df.empty df_chunked = pd.concat(dfs).reset_index(drop=True) df = read_telescope_data(path, config, columns=cols).reset_index(drop=True) assert_frame_equal(df, df_chunked)
import argparse import numpy as np if __name__ == '__main__': parser = argparse.ArgumentParser(description='PATH AND STUFF') parser.add_argument('df_path', type=str) parser.add_argument('config_path', type=str) parser.add_argument('output_folder', type=str) args = parser.parse_args() #df_array = read_data(args.df_path, 'array_events') #df_tel = read_data(args.df_path, 'telescope_events') #df_tel = df_tel.merge(df_array[['run_id', 'array_event_id', 'mc_energy', 'mc_alt', 'mc_az', 'num_triggered_telescopes']], on=['run_id', 'array_event_id'], how='left') config = AICTConfig.from_yaml(args.config_path) df_tel = read_telescope_data(args.df_path, config) theta = angular_separation( df_tel['mc_az'].values * u.deg, df_tel['mc_alt'].values * u.deg, df_tel['source_az_prediction'].values * u.deg, df_tel['source_alt_prediction'].values * u.deg).to(u.deg) df_tel['tel_theta'] = theta true_x, true_y = horizontal_to_camera_cta_simtel(df_tel) true_disp, true_sign = calc_true_disp(true_x, true_y, df_tel['x'], df_tel['y'], np.deg2rad(df_tel['psi'])) predicted_sign_mask = (df_tel['disp_prediction'] > 0) true_sign_mask = (true_sign > 0) correct_mask = (predicted_sign_mask == true_sign_mask) for tel_id in df_tel['telescope_type_id'].unique():