Python li_ma_significance Beispiele, fact.analysis.li_ma_significance Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: detection_gridsearch.py Projekt: KevSed/OpenData_PhotonStream_Analysis

def main(data_path, key):

    events = read_h5py(data_path, key='events', columns=columns)

    theta2_cuts = np.arange(0.1, 0.0, -0.001)
    prediction_thresholds = np.arange(0.75, 1, 0.001)

    max_significance = 0
    selected = events
    for threshold in tqdm(prediction_thresholds):
        selected = selected.query('gamma_prediction >= {}'.format(threshold))

        theta2_on = selected.theta_deg**2
        theta2_off = pd.concat(
            [selected['theta_deg_off_{}'.format(i)] for i in range(1, 6)])**2

        for theta2_cut in theta2_cuts:
            theta2_on = theta2_on[theta2_on <= theta2_cut]
            theta2_off = theta2_off[theta2_off <= theta2_cut]

            n_on = len(theta2_on)
            n_off = len(theta2_off)

            sig = li_ma_significance(n_on, n_off, 0.2)
            if sig >= max_significance:
                max_significance = sig
                best_threshold = threshold
                best_theta2_cut = theta2_cut

    print('Threshold:', best_threshold)
    print('θ² cut:   ', best_theta2_cut)
    print('Li&Ma    :', max_significance)

Beispiel #2

0

Datei anzeigen

def calculate_significance(signal_events,
                           background_events,
                           theta_cut,
                           alpha=0.2):
    n_on, _, n_off, _ = calculate_n_on_n_off(signal_events,
                                             background_events,
                                             theta_cut,
                                             alpha=alpha)
    return li_ma_significance(n_on, n_off, alpha=alpha)

Beispiel #3

0

Datei anzeigen

Datei: sensitivity_fixed_theta.py Projekt: kbruegge/cta_performance_plots

def find_best_prediction_cut(prediction_cuts,
                             signal_events,
                             background_events,
                             angular_resolution,
                             alpha=1,
                             silent=False):
    rs = []
    for pc in tqdm(prediction_cuts, disable=silent):
        m = signal_events.gamma_prediction_mean >= pc
        selected_signal = signal_events[m]
        m = background_events.gamma_prediction_mean >= pc
        selected_background = background_events[m]

        theta_cut = angular_resolution(
            selected_signal.gamma_energy_prediction_mean)
        n_signal, n_signal_count = calculate_n_signal(signal_events, theta_cut)
        theta_cut = angular_resolution(
            selected_background.gamma_energy_prediction_mean)
        n_off, n_off_count, total_bkg_counts = calculate_n_off(
            background_events, theta_cut, alpha=alpha)

        n_on = n_signal + alpha * n_off
        n_on_count = n_signal_count + alpha * n_off_count

        relative_sensitivity = find_relative_sensitivity(n_signal,
                                                         n_off,
                                                         alpha=alpha)

        significance = li_ma_significance(n_on, n_off, alpha=alpha)

        # valid = check_validity(n_signal_count, n_off_count, alpha=alpha)
        valid = check_validity(n_signal,
                               n_off,
                               total_bkg_counts=total_bkg_counts,
                               alpha=alpha)
        if not valid:
            significance = 0
            relative_sensitivity = np.inf

        rs.append(
            [relative_sensitivity, significance, pc, n_on_count, n_off_count])

    relative_sensitivities = np.array([r[0] for r in rs])
    significances = np.array([r[1] for r in rs])

    if (significances == 0).all():
        return np.nan, np.nan, np.nan

    max_index = np.nanargmin(relative_sensitivities)
    best_relative_sensitivity, best_significance, best_prediction_cut, on_counts, off_counts = rs[
        max_index]
    return best_prediction_cut, best_significance, best_relative_sensitivity

Beispiel #4

0

Datei anzeigen

def main(gammas, protons, output):

    t_obs = 50 * u.h

    gammas = fact.io.read_data(gammas, key='array_events')
    gammas = gammas.dropna()

    gamma_runs = fact.io.read_data(gammas, key='runs')
    mc_production_gamma = MCSpectrum.from_cta_runs(gamma_runs)

    protons = fact.io.read_data(protons, key='array_events')
    protons = protons.dropna()

    # print(f'Plotting {len(protons)} protons and {len(gammas)} gammas.')
    proton_runs = fact.io.read_data(protons, key='runs')
    mc_production_proton = MCSpectrum.from_cta_runs(proton_runs)

    crab = CrabSpectrum()
    cosmic = CosmicRaySpectrum()

    gammas['weight'] = mc_production_gamma.reweigh_to_other_spectrum(
        crab, gammas.mc_energy.values * u.TeV, t_assumed_obs=t_obs)
    protons['weight'] = mc_production_proton.reweigh_to_other_spectrum(
        cosmic, protons.mc_energy.values * u.TeV, t_assumed_obs=t_obs)

    # gammas_gammalike = gammas.query(f'gamma_prediction_mean > {cut}')
    # protons_gammalike = protons.query(f'gamma_prediction_mean > {cut}')

    bin_edges, _, _ = make_energy_bins(gammas.mc_energy.values * u.TeV,
                                       bins=20)
    on, off, alpha = coordinates.split_on_off(gammas,
                                              protons,
                                              on_region_radius=0.4 * u.deg)
    print(f'alpha:{alpha}')
    on['energy_bin'] = pd.cut(on.mc_energy, bin_edges)
    off['energy_bin'] = pd.cut(off.mc_energy, bin_edges)
    for ((_, g_on), (_, g_off)) in zip(on.groupby('energy_bin'),
                                       off.groupby('energy_bin')):
        n_on = g_on.weight.sum()
        n_off = g_off.weight.sum()
        print('----' * 20)
        print(n_on, n_off)
        print(g_on.size, g_off.size)
        print(li_ma_significance(n_on, n_off, alpha=1))

    if output:
        plt.savefig(output)
    else:
        plt.show()

Beispiel #5

0

Datei anzeigen

def model_significance(estimator, data):
		'''
		Evaluate significance on given trained model and given datset.
		Parameters:
			estimator: sklearn.model
				Trained model, so there the estimator can make predictions 
				on the dataset.
			data: pd.DataFrame
				The dataset where the siginificance should be calculated
		Returns:
			max(significance): float
				Maximal signigicance on the dataset by given model.
		'''
		feature = load_feature()
		data['gamma_prediction'] = estimator.predict_proba(data[feature])[:,1]
		significance = []
		for threshold in np.linspace(0.01, 0.99, 99):
			on_data, off_data = split_on_off_source_independent(
							data.query('gamma_prediction >'+threshold.astype(str)),
							theta2_cut=0.03)
			significance.append(li_ma_significance(len(on_data), len(off_data), 0.2))
		return max(significance)

Beispiel #6

0

Datei anzeigen

def plot_significance(estimator, data, save=True, path= 'significance.pdf'):
		'''
		Plot the significance in dependence to threshold.
		Parameters:
			estimator: sklearn.model
				Trained model, so there the estimator can make predictions 
				on the dataset.
			data: pd.DataFrame
				The dataset where the siginificance should be calculated
		'''
		feature = load_feature()
		data['gamma_prediction'] = estimator.predict_proba(data[feature])[:,1]
		significance = []
		for threshold in np.linspace(0.01, 0.99, 99):
			on_data, off_data = split_on_off_source_independent(
							data.query('gamma_prediction >'+threshold.astype(str)), 
							theta2_cut=0.03)
			significance.append(li_ma_significance(len(on_data), len(off_data), 0.2))
		plt.plot(np.linspace(0.01, 0.99, 99), significance)
		if(save==True):
			plt.title('max('+str(round(max(significance),2))+')')
			plt.xlabel('threshold')
			plt.ylabel('confidence')
			plt.savefig(path)

Beispiel #7

0

Datei anzeigen

Datei: plot_theta_squared.py Projekt: kbruegge/cnn_cherenkov

def main(predictions, threshold, theta_cut, net):
    bins = 40
    alpha = 0.2
    limits = [0, 0.3]
    df = fio.read_data(predictions, key='events')
    print(df.columns)
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    if net:
        print('using cnn predictions')
        selected = df.query('predictions_convnet > {}'.format(threshold))
        ax.set_title('Neural Net predictions')
    else:
        print('using standard predictions')
        selected = df.query('gamma_prediction > {}'.format(threshold))
        ax.set_title('RF predictions')

    theta_on = selected.theta_deg
    theta_off = pd.concat(
        [selected['theta_deg_off_{}'.format(i)] for i in range(1, 6)])
    h_on, bin_edges = np.histogram(theta_on.apply(lambda x: x**2).values,
                                   bins=bins,
                                   range=limits)
    h_off, bin_edges, _ = ax.hist(
        theta_off.apply(lambda x: x**2).values,
        bins=bin_edges,
        range=limits,
        weights=np.full(len(theta_off), 0.2),
        histtype='stepfilled',
        color='lightgray',
    )

    bin_center = bin_edges[1:] - np.diff(bin_edges) * 0.5
    bin_width = np.diff(bin_edges)

    ax.errorbar(
        bin_center,
        h_on,
        yerr=np.sqrt(h_on) / 2,
        xerr=bin_width / 2,
        linestyle='',
        label='On',
    )
    ax.errorbar(
        bin_center,
        h_off,
        yerr=alpha * np.sqrt(h_off) / 2,
        xerr=bin_width / 2,
        linestyle='',
        label='Off',
        color='darkgray',
    )

    ax.axvline(theta_cut**2, color='black', alpha=0.3, linestyle='--')

    n_on = np.sum(theta_on < theta_cut)
    n_off = np.sum(theta_off < theta_cut)
    significance = li_ma_significance(n_on, n_off, alpha=alpha)

    print('N_on', n_on)
    print('N_off', n_off)
    print('Li&Ma: {}'.format(significance))

    ax.text(
        0.5,
        0.95,
        stats_box_template.format(
            n_on=n_on,
            n_off=n_off,
            alpha=alpha,
            n_excess=n_on - alpha * n_off,
            n_excess_err=np.sqrt(n_on + alpha**2 * n_off),
            significance=significance,
        ),
        transform=ax.transAxes,
        va='top',
        ha='center',
    )

    ax.set_xlim(*limits)
    ax.legend(loc='lower right')
    fig.tight_layout(pad=0)

    plt.show()

Beispiel #8

0

Datei anzeigen

Datei: plot_theta_squared.py Projekt: nbiederbeck/fact_plots

def main(data_path, threshold, theta2_cut, key, bins, alpha, start, end,
         preliminary, ymax, config, output):
    '''
    Given the DATA_PATH to a data hdf5 file (e.g. the output of ERNAs gather scripts)
    this script will create the infamous theta square plot.

    This plot shows the events of (selected gamma-like) events which have been
    reconstructed as coming from the source region and the one coming from a
    (more or less abritrary) off region.

    In a traditional IACT analysis this plot is used to calculate the significance of
    detection.

    The HDF files are expected to a have a group called 'runs' and a group called 'events'
    The events group has to have the columns:
        'theta',
        'theta_deg_off_1',
        'theta_deg_off_2',
        'theta_deg_off_3',
        'theta_deg_off_4',
        'theta_deg_off_5',

    If a prediction threshold is to be used, also 'gamma_prediction',
    must be in the group.
    The 'gamma_prediction' column can be added to the data using
    'klaas_apply_separation_model' for example.
    '''
    if config:
        with open(config) as f:
            plot_config.update(yaml.safe_load(f))

    theta_cut = np.sqrt(theta2_cut)

    if threshold > 0.0:
        columns.append('gamma_prediction')

    events = read_h5py(data_path, key='events', columns=columns)

    if start or end:
        events['timestamp'] = read_timestamp(data_path)

    try:
        runs = read_h5py(data_path, key='runs')
        runs['run_start'] = pd.to_datetime(runs['run_start'])
        runs['run_stop'] = pd.to_datetime(runs['run_stop'])
    except IOError:
        runs = pd.DataFrame(
            columns=['run_start', 'run_stop', 'ontime', 'source'])

    if start is not None:
        events = events.query('timestamp >= @start')
        runs = runs.query('run_start >= @start')
    if end is not None:
        events = events.query('timestamp <= @end')
        runs = runs.query('run_stop <= @end')

    if threshold > 0:
        selected = events.query('gamma_prediction >= {}'.format(threshold))
    else:
        selected = events
    theta_on = selected.theta_deg
    theta_off = pd.concat(
        [selected['theta_deg_off_{}'.format(i)] for i in range(1, 6)])

    del events

    max_theta2 = 0.3
    width = max_theta2 / bins
    rounded_width = theta2_cut / np.round(theta2_cut / width)
    bins = np.arange(0, max_theta2 + 0.1 * rounded_width, rounded_width)

    print('Using {} bins to get theta_cut on a bin edge'.format(len(bins) - 1))

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    h_on, bin_edges = np.histogram(
        theta_on.apply(lambda x: x**2).values,
        bins=bins,
    )
    h_off, bin_edges, _ = ax.hist(
        theta_off.apply(lambda x: x**2).values,
        bins=bin_edges,
        weights=np.full(len(theta_off), 0.2),
        histtype='stepfilled',
        color='lightgray',
        zorder=0,
    )

    bin_center = bin_edges[1:] - np.diff(bin_edges) * 0.5
    bin_width = np.diff(bin_edges)

    ax.errorbar(
        bin_center,
        h_on,
        yerr=np.sqrt(h_on),
        xerr=bin_width / 2,
        linestyle='',
        label='On',
    )

    ax.errorbar(bin_center,
                h_off,
                yerr=alpha * np.sqrt(h_off),
                xerr=bin_width / 2,
                linestyle='',
                label='Off',
                zorder=1)

    ax.axvline(theta_cut**2, color='black', alpha=0.3, linestyle='--')

    n_on = np.sum(theta_on < theta_cut)
    n_off = np.sum(theta_off < theta_cut)
    significance = li_ma_significance(n_on, n_off, alpha=alpha)

    print('N_on', n_on)
    print('N_off', n_off)
    print('Li&Ma: {}'.format(significance))

    ax.text(
        0.5,
        0.95,
        stats_box_template.format(
            source=runs.source.iloc[0] if len(runs) > 0 else '',
            t_obs=runs.ontime.sum() / 3600,
            n_on=n_on,
            n_off=n_off,
            alpha=alpha,
            n_excess=n_on - alpha * n_off,
            n_excess_err=np.sqrt(n_on + alpha**2 * n_off),
            significance=significance,
        ),
        transform=ax.transAxes,
        va='top',
        ha='center',
    )

    if preliminary:
        add_preliminary(
            plot_config['preliminary_position'],
            size=plot_config['preliminary_size'],
            color=plot_config['preliminary_color'],
            ax=ax,
        )

    if ymax:
        ax.set_ylim(0, ymax)

    ax.set_xlim(0, bins.max())
    ax.set_xlabel(plot_config['xlabel'])
    ax.legend(loc=plot_config['legend_loc'])
    fig.tight_layout(pad=0)

    if output:
        fig.savefig(output, dpi=300)
    else:
        plt.show()

Beispiel #9

0

Datei anzeigen

Datei: 08a_Plot_Theta.py Projekt: JMBehnken/Gamma_Hadron_Separation_With_Deep_Learning_for_the_First_G-APD_Cherenkov_Telescope

def theta_square_plot(theta2_cut=0.8,
                      data_path=plotting_path,
                      key='events',
                      start=None,
                      end=None,
                      threshold=0.5,
                      bins=40,
                      alpha=0.2,
                      output=False):
    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np
    import h5py
    from dateutil.parser import parse as parse_date

    from fact.io import read_h5py
    from fact.analysis import (
        li_ma_significance,
        split_on_off_source_dependent,
    )
    import click

    columns = [
        'gamma_prediction',
        'theta_deg',
        'theta_deg_off_1',
        'theta_deg_off_2',
        'theta_deg_off_3',
        'theta_deg_off_4',
        'theta_deg_off_5',
        'unix_time_utc',
    ]

    stats_box_template = r'''Source: {source}, $t_\mathrm{{obs}} = {t_obs:.2f}\,\mathrm{{h}}$
    $N_\mathrm{{On}} = {n_on}$, $N_\mathrm{{Off}} = {n_off}$, $\alpha = {alpha}$
    $N_\mathrm{{Exc}} = {n_excess:.1f} \pm {n_excess_err:.1f}$, $S_\mathrm{{Li&Ma}} = {significance:.1f}\,\sigma$
    '''

    theta_cut = np.sqrt(theta2_cut)

    with h5py.File(data_path, 'r') as f:
        source_dependent = 'gamma_prediction_off_1' in f[key].keys()

    if source_dependent:
        print('Separation was using source dependent features')
        columns.extend('gamma_prediction_off_' + str(i) for i in range(1, 6))
        theta_cut = np.inf
        theta2_cut = np.inf

    events = read_h5py(data_path, key='events', columns=columns)
    events['timestamp'] = pd.to_datetime(
        events['unix_time_utc_0'] * 1e6 + events['unix_time_utc_1'],
        unit='us',
    )
    runs = read_h5py(data_path, key='runs')
    runs['run_start'] = pd.to_datetime(runs['run_start'])
    runs['run_stop'] = pd.to_datetime(runs['run_stop'])

    if start is not None:
        events = events.query('timestamp >= @start')
        runs = runs.query('run_start >= @start')
    if end is not None:
        events = events.query('timestamp <= @end')
        runs = runs.query('run_stop <= @end')

    if source_dependent:
        on_data, off_data = split_on_off_source_dependent(events, threshold)
        theta_on = on_data.theta_deg
        theta_off = off_data.theta_deg
    else:
        selected = events.query('gamma_prediction >= {}'.format(threshold))
        theta_on = selected.theta_deg
        theta_off = pd.concat(
            [selected['theta_deg_off_{}'.format(i)] for i in range(1, 6)])

    del events

    if source_dependent:
        limits = [
            0,
            max(
                np.percentile(theta_on, 99)**2,
                np.percentile(theta_off, 99)**2),
        ]
    else:
        limits = [0, 0.3]

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    h_on, bin_edges = np.histogram(theta_on.apply(lambda x: x**2).values,
                                   bins=bins,
                                   range=limits)
    h_off, bin_edges, _ = ax.hist(
        theta_off.apply(lambda x: x**2).values,
        bins=bin_edges,
        range=limits,
        weights=np.full(len(theta_off), 0.2),
        histtype='stepfilled',
        color='lightgray',
    )

    bin_center = bin_edges[1:] - np.diff(bin_edges) * 0.5
    bin_width = np.diff(bin_edges)

    ax.errorbar(
        bin_center,
        h_on,
        yerr=np.sqrt(h_on) / 2,
        xerr=bin_width / 2,
        linestyle='',
        label='On',
    )
    ax.errorbar(
        bin_center,
        h_off,
        yerr=alpha * np.sqrt(h_off) / 2,
        xerr=bin_width / 2,
        linestyle='',
        label='Off',
    )

    if not source_dependent:
        ax.axvline(theta_cut**2, color='gray', linestyle='--')

    n_on = np.sum(theta_on < theta_cut)
    n_off = np.sum(theta_off < theta_cut)
    significance = li_ma_significance(n_on, n_off, alpha=alpha)

    ax.text(
        0.5,
        0.95,
        stats_box_template.format(
            source='Crab',
            t_obs=83.656,
            n_on=n_on,
            n_off=n_off,
            alpha=alpha,
            n_excess=n_on - alpha * n_off,
            n_excess_err=np.sqrt(n_on + alpha**2 * n_off),
            significance=significance,
        ),
        transform=ax.transAxes,
        fontsize=12,
        va='top',
        ha='center',
    )

    ax.set_xlabel(r'$(\theta / {}^\circ )^2$')
    ax.legend()
    fig.tight_layout()
    plt.xlim(0.0, 0.3)

    if output:
        fig.savefig(output, dpi=300)
    else:
        #plt.show()
        pass

Beispiel #10

0

Datei anzeigen

Datei: create_truee_files.py Projekt: fact-project/truee-wrapper

def main(data_path, gamma_path, corsika_path, config_template, output_base,
         threshold, theta2_cut, gamma_fraction, title, start, end, zd_min,
         zd_max):

    with h5py.File(data_path, 'r') as f:
        source_dependent = 'gamma_prediction_off_1' in f['events'].keys()

    if source_dependent:
        other_columns.extend(bg_prediction_columns)
        theta_cut = np.inf
        theta2_cut = np.inf
        print('Source dependent separation, ignoring theta cut')

    theta_cut = np.sqrt(theta2_cut)

    data = read_h5py(data_path,
                     key='events',
                     columns=data_columns + output_columns + other_columns)

    gammas = read_h5py(
        gamma_path,
        key='events',
        columns=mc_columns + output_columns + other_columns,
    )
    gammas.rename(
        columns={'corsika_evt_header_total_energy': 'true_energy'},
        inplace=True,
    )

    runs = read_h5py(data_path, key='runs')

    data['timestamp'] = pd.to_datetime(
        data['unix_time_utc_0'] * 1e6 + data['unix_time_utc_1'],
        unit='us',
    )

    if start:
        data = data.query('timestamp >= @start')
        runs = runs.query('run_start >= @start')
    if end:
        data = data.query('timestamp <= @end')
        runs = runs.query('run_start <= @end')

    min_zenith = runs.zenith.min()
    max_zenith = runs.zenith.max()

    if zd_min:
        min_zenith = max(min_zenith, zd_min)

    if zd_max:
        max_zenith = min(max_zenith, zd_max)

    print('Zenith range of the input data:', min_zenith, max_zenith)

    if source_dependent:
        on_data, off_data = split_on_off_source_dependent(data, threshold)
        on_gammas = gammas.query('gamma_prediction >= {}'.format(threshold))
    else:
        on_data, off_data = split_on_off_source_independent(
            data.query('gamma_prediction >= {}'.format(threshold)),
            theta2_cut=theta2_cut,
        )
        on_gammas = gammas.query(
            '(theta_deg <= {}) & (gamma_prediction >= {})'.format(
                theta_cut,
                threshold,
            ))

    query = '(zd_tracking >= {}) and (zd_tracking <= {})'.format(
        min_zenith, max_zenith)
    on_gammas = on_gammas.query(query).copy()

    output_columns.append('theta_deg')
    on_gammas = on_gammas.loc[:, output_columns + ['true_energy']]
    on_data = on_data.loc[:, output_columns + data_columns]
    off_data = off_data.loc[:, output_columns + data_columns]

    off_data['weight'] = 0.2
    on_data['weight'] = 1.0
    on_gammas['weight'] = 1.0

    rpd.to_root(on_data, output_base + '_on.root', key='events')
    rpd.to_root(off_data, output_base + '_off.root', key='events')
    rpd.to_root(on_gammas, output_base + '_mc.root', key='events')

    print('N_on: {}'.format(len(on_data)))
    print('N_off: {}'.format(len(off_data)))
    print('S(Li&Ma): {}'.format(
        li_ma_significance(len(on_data), len(off_data), 0.2)))
    print('N_mc: {}'.format(len(on_gammas)))

    n_excess = len(on_data) - 0.2 * len(off_data)
    fraction = n_excess / len(on_gammas)

    print('N_excess:', n_excess)
    print('Fraction: {:1.4f}'.format(fraction))

    with open(config_template) as f:
        template = f.read()

    t_obs = runs.ontime.sum()

    try:
        corsika = pd.read_hdf(corsika_path, key='table')
    except KeyError:
        f = h5py.File(corsika_path)
        print("given key not in file: possible keys are: {}".format(
            list(f.keys())))
        return

    corsika['zenith'] = np.rad2deg(corsika['zenith'])
    corsika = corsika.query('(zenith >= {}) and (zenith <= {})'.format(
        min_zenith, max_zenith))
    print('Simulated events after zenith cut: {}'.format(len(corsika)))

    config = template.format(
        t_obs=t_obs,
        selection_fraction=gamma_fraction,
        n_gamma=len(corsika),
        source_file_on=output_base + '_on.root',
        source_file_off=output_base + '_off.root',
        source_file_mc=output_base + '_mc.root',
        tree_name='events',
        output_file=output_base + '_result.root',
        fraction=fraction,
        min_zenith=min_zenith,
        max_zenith=max_zenith,
        title=title,
    )

    with open(output_base + '.config', 'w') as f:
        f.write(config)

Beispiel #11

0

Datei anzeigen

mc_Tree.fit(mc_data.drop('label', axis=1), mc_data.label)
mc_xgbc.fit(mc_data.drop('label', axis=1), mc_data.label)

pred_mess_tree = mess_Tree.predict_proba(eval_data[feature])[:, 1]
pred_mess_xgbc = mess_xgbc.predict_proba(eval_data[feature])[:, 1]
pred_mc_tree = mc_Tree.predict_proba(eval_data[feature])[:, 1]
pred_mc_xgbc = mc_xgbc.predict_proba(eval_data[feature])[:, 1]

sig_mess_tree = []
sig_mess_xgbc = []
sig_mc_tree = []
sig_mc_xgbc = []
for threshold in np.linspace(0.01, 0.99, 99):
    on_data, off_data = split_on_off_source_independent(
        eval_data[threshold <= pred_mess_tree], theta2_cut=0.03)
    sig_mess_tree.append(li_ma_significance(len(on_data), len(off_data), 0.2))
    on_data, off_data = split_on_off_source_independent(
        eval_data[threshold <= pred_mess_xgbc], theta2_cut=0.03)
    sig_mess_xgbc.append(li_ma_significance(len(on_data), len(off_data), 0.2))
    on_data, off_data = split_on_off_source_independent(
        eval_data[threshold <= pred_mc_tree], theta2_cut=0.03)
    sig_mc_tree.append(li_ma_significance(len(on_data), len(off_data), 0.2))
    on_data, off_data = split_on_off_source_independent(
        eval_data[threshold <= pred_mc_xgbc], theta2_cut=0.03)
    sig_mc_xgbc.append(li_ma_significance(len(on_data), len(off_data), 0.2))

data = pd.DataFrame({
    'sig_mess_tree': np.transpose(sig_mess_tree),
    'sig_mess_xgbc': np.transpose(sig_mess_xgbc),
    'sig_mc_tree': np.transpose(sig_mc_tree),
    'sig_mc_xgbc': np.transpose(sig_mc_xgbc)

Beispiel #12

0

Datei anzeigen

Datei: calc_significance.py Projekt: fact-project/open_data_talk

from fact.analysis import li_ma_significance, split_on_off_source_independent
from fact.io import read_data


df = read_data('crab_gammas_dl3.hdf5', key='events')


on, off = split_on_off_source_independent(
    df.query('gamma_prediction > 0.85'),
    0.025,
)

with open('build/significance.tex', 'w') as f:
    f.write(r'\SI{')
    f.write(
        '{:.1f}'.format(li_ma_significance(len(on), len(off), 0.2))
    )
    f.write(r'}{σ}')

Beispiel #13

0

Datei anzeigen

def _target(scaling_factor, n_signal, n_background, alpha=0.2, sigma=5):
    n_on = n_background * alpha + n_signal * scaling_factor
    n_off = n_background

    significance = li_ma_significance(n_on, n_off, alpha=alpha)
    return (sigma - significance)**2