Python write_dataframe Examples, lstchain.io.io.write_dataframe Python Examples

Example #1

0

Show file

def test_write_dataframe():
    from lstchain.io.io import write_dataframe

    df = pd.DataFrame(
        {
            "x": np.random.normal(size=10),
            "N": np.random.poisson(5, size=10),
        }
    )

    with tempfile.NamedTemporaryFile() as f:
        write_dataframe(df, f.name, "data/awesome_table")

        with tables.open_file(f.name) as h5_file:
            # make sure nothing else in this group
            # (e.g. like pandas writes _i_ tables)
            assert h5_file.root.data._v_children.keys() == {"awesome_table"}

            table = h5_file.root.data.awesome_table[:]
            for col in df.columns:
                np.testing.assert_array_equal(table[col], df[col])

        # test it's also readable by pandas directly
        df_read = pd.read_hdf(f.name, "data/awesome_table")
        assert df.equals(df_read)

        # and with astropy
        t = Table.read(f.name, "data/awesome_table")
        for col in df.columns:
            np.testing.assert_array_equal(t[col], df[col])

Example #2

0

Show file

def main():

    args = parser.parse_args()

    dl1_filename = os.path.abspath(args.input_file)

    config = get_standard_config()
    if args.config_file is not None:
        try:
            config = read_configuration_file(os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    dl1_params = pd.read_hdf(dl1_filename, key=dl1_params_lstcam_key)
    subarray_info = SubarrayDescription.from_hdf(dl1_filename)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    src_dep_df = pd.concat(get_source_dependent_parameters(
        dl1_params, config, focal_length=focal_length),
                           axis=1)

    metadata = global_metadata()
    write_dataframe(src_dep_df,
                    dl1_filename,
                    dl1_params_src_dep_lstcam_key,
                    config=config,
                    meta=metadata)

Example #3

0

Show file

File: test_io.py Project: gabemery/cta-lstchain

def test_write_dataframe():
    a = np.ones(3)
    df = pd.DataFrame(a, columns=['a'])
    with tempfile.NamedTemporaryFile() as f:
        io.write_dataframe(df, f.name, 'data/awesome_table')
        with tables.open_file(f.name) as file:
            np.testing.assert_array_equal(file.root.data.awesome_table[:]['a'], a)

Example #4

0

Show file

File: lstchain_add_source_dependent_parameters.py Project: satoshifukami0115/cta-lstchain

def main():

    dl1_filename = os.path.abspath(args.input_file)

    config = {}
    if args.config_file is not None:
        try:
            config = read_configuration_file(os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    dl1_params = pd.read_hdf(dl1_filename, key=dl1_params_lstcam_key)
    src_dep_df = get_source_dependent_parameters(dl1_params, config)
    write_dataframe(src_dep_df, dl1_filename, dl1_params_src_dep_lstcam_key)

Example #5

0

Show file

File: test_io.py Project: garciagenrique/cta-lstchain

def test_write_dataframe_index():
    """Test that also an index can be written."""
    from lstchain.io.io import write_dataframe
    df = pd.DataFrame({
        'x': np.random.normal(size=10),
        'N': np.random.poisson(5, size=10),
    })
    df.index.name = 'event_id'

    with tempfile.NamedTemporaryFile() as f:
        write_dataframe(df, f.name, 'data/awesome_table', index=True)

        with tables.open_file(f.name) as file:
            table = file.root.data.awesome_table[:]
            for col in df.columns:
                np.testing.assert_array_equal(table[col], df[col])

            np.testing.assert_array_equal(table['event_id'], df.index)

Example #6

0

Show file

File: test_io.py Project: Hckjs/cta-lstchain

def test_write_dataframe():
    from lstchain.io import config, global_metadata
    from lstchain.io.io import write_dataframe

    df = pd.DataFrame({
        "x": np.random.normal(size=10),
        "N": np.random.poisson(5, size=10),
    })
    config = config.get_standard_config()

    with tempfile.NamedTemporaryFile() as f:
        meta = global_metadata()
        write_dataframe(df,
                        f.name,
                        "data/awesome_table",
                        config=config,
                        meta=meta)

        with tables.open_file(f.name) as h5_file:
            # make sure nothing else in this group
            # (e.g. like pandas writes _i_ tables)
            assert h5_file.root.data._v_children.keys() == {"awesome_table"}

            table = h5_file.root.data.awesome_table[:]
            for col in df.columns:
                np.testing.assert_array_equal(table[col], df[col])

            # test global metadata and config are properly written
            for k in meta.keys():
                assert meta[k] == h5_file.root.data.awesome_table.attrs[k]
            assert config == h5_file.root.data.awesome_table.attrs["config"]

        # test it's also readable by pandas directly
        df_read = pd.read_hdf(f.name, "data/awesome_table")
        assert df.equals(df_read)

        # and with astropy
        t = Table.read(f.name, "data/awesome_table")
        for col in df.columns:
            np.testing.assert_array_equal(t[col], df[col])

Example #7

0

Show file

File: lstchain_dl1_to_dl2.py Project: Hckjs/cta-lstchain

def main():
    args = parser.parse_args()

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    if 'lh_fit_config' in config.keys():
        lhfit_data = pd.read_hdf(args.input_file,
                                 key=dl1_likelihood_params_lstcam_key)
        if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(
                lhfit_data['event_id'] == data['event_id']):
            lhfit_data.drop({'obs_id', 'event_id'}, axis=1, inplace=True)
        lhfit_keys = lhfit_data.keys()
        data = pd.concat([data, lhfit_data], axis=1)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Get trained RF path for reconstruction:
    file_reg_energy = os.path.join(args.path_models, 'reg_energy.sav')
    file_cls_gh = os.path.join(args.path_models, 'cls_gh.sav')
    if config['disp_method'] == 'disp_vector':
        file_disp_vector = os.path.join(args.path_models,
                                        'reg_disp_vector.sav')
    elif config['disp_method'] == 'disp_norm_sign':
        file_disp_norm = os.path.join(args.path_models, 'reg_disp_norm.sav')
        file_disp_sign = os.path.join(args.path_models, 'cls_disp_sign.sav')

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['energy_regression_features'] +
            config['disp_regression_features'] +
            config['particle_classification_features'] +
            config['disp_classification_features'],
        )

        if config['disp_method'] == 'disp_vector':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_vector=file_disp_vector,
                                          focal_length=focal_length,
                                          custom_config=config)
        elif config['disp_method'] == 'disp_norm_sign':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_norm=file_disp_norm,
                                          cls_disp_sign=file_disp_sign,
                                          focal_length=focal_length,
                                          custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:

        # if source-dependent parameters are already in dl1 data, just read those data.
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(args.input_file):
            data_srcdep = get_srcdep_params(args.input_file)

        # if not, source-dependent parameters are added now
        else:
            data_srcdep = pd.concat(dl1_to_dl2.get_source_dependent_parameters(
                data, config, focal_length=focal_length),
                                    axis=1)

        dl2_srcdep_dict = {}
        srcindep_keys = data.keys()
        srcdep_assumed_positions = data_srcdep.columns.levels[0]

        for i, k in enumerate(srcdep_assumed_positions):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['energy_regression_features'] +
                config['disp_regression_features'] +
                config['particle_classification_features'] +
                config['disp_classification_features'],
            )

            if config['disp_method'] == 'disp_vector':
                dl2_df = dl1_to_dl2.apply_models(
                    data_with_srcdep_param,
                    file_cls_gh,
                    file_reg_energy,
                    reg_disp_vector=file_disp_vector,
                    focal_length=focal_length,
                    custom_config=config)
            elif config['disp_method'] == 'disp_norm_sign':
                dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                 file_cls_gh,
                                                 file_reg_energy,
                                                 reg_disp_norm=file_disp_norm,
                                                 cls_disp_sign=file_disp_sign,
                                                 focal_length=focal_length,
                                                 custom_config=config)

            dl2_srcdep = dl2_df.drop(srcindep_keys, axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df[srcindep_keys]

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2', 1))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    if dl1_likelihood_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_likelihood_params_lstcam_key)

    metadata = global_metadata()
    write_metadata(metadata, output_file)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata
    if not config['source_dependent']:
        if 'lh_fit_config' not in config.keys():
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
        else:
            dl2_onlylhfit = dl2[lhfit_keys]
            dl2.drop(lhfit_keys, axis=1, inplace=True)
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
            write_dataframe(dl2_onlylhfit,
                            output_file,
                            dl2_likelihood_params_lstcam_key,
                            config=config,
                            meta=metadata)

    else:
        write_dl2_dataframe(dl2_srcindep,
                            output_file,
                            config=config,
                            meta=metadata)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1),
                        output_file,
                        dl2_params_src_dep_lstcam_key,
                        config=config,
                        meta=metadata)

Example #8

0

Show file

File: lstchain_dl1_to_dl2.py Project: garciagenrique/cta-lstchain

def main():

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    #Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    #Apply the models to the data

    #Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['regression_features'] +
            config['classification_features'],
        )

        dl2 = dl1_to_dl2.apply_models(data,
                                      cls_gh,
                                      reg_energy,
                                      reg_disp_vector,
                                      custom_config=config)

    #Source-dependent analysis
    if config['source_dependent']:
        data_srcdep = pd.read_hdf(args.input_file,
                                  key=dl1_params_src_dep_lstcam_key)
        data_srcdep.columns = pd.MultiIndex.from_tuples([
            tuple(col[1:-1].replace('\'', '').replace(' ', '').split(","))
            for col in data_srcdep.columns
        ])

        dl2_srcdep_dict = {}

        for i, k in enumerate(data_srcdep.columns.levels[0]):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['regression_features'] +
                config['classification_features'],
            )
            dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                             cls_gh,
                                             reg_energy,
                                             reg_disp_vector,
                                             custom_config=config)

            dl2_srcdep = dl2_df.drop(data.keys(), axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df.drop(data_srcdep[k].keys(), axis=1)

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2'))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    if not config['source_dependent']:
        write_dl2_dataframe(dl2, output_file)

    else:
        write_dl2_dataframe(dl2_srcindep, output_file)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file,
                        dl2_params_src_dep_lstcam_key)