def save_livetime_plot(df, config, time_bins): fig, axarr = plt.subplots(3, 4, figsize=(10, 8), sharex=True, sharey=True) for month, ax in zip(df.index, axarr.flatten()): row = df.loc[month] counts = row['counts'] I0_fit = row['I0_fit'] T_fit = row['T_fit'] livetime = row['livetime'] livetime_err = row['livetime_err'] livetime_str = 'Livetime [s]:\n{:0.2e} +/- {:0.1f}'.format( livetime, livetime_err) # Plot time difference histogram and corresponding fit comp.plot_steps(time_bins, counts, ax=ax) time_midpoints = (time_bins[1:] + time_bins[:-1]) / 2 ax.plot(time_midpoints, livetime_fit_func(time_midpoints, I0_fit, T_fit), marker='None', ls='-', c='C1') month_str = datetime.date(2000, month, 1).strftime('%B') ax.set_title(month_str) ax.set_xlim((0, 2)) ax.set_yscale('log', nonposy='clip') ax.text(0.6, 2.5e5, livetime_str, fontsize=10) ax.grid() fig.text(0.5, 0, 'Time between events [s]', ha='center', fontsize=16) fig.text(0, 0.5, 'Counts', va='center', rotation='vertical', fontsize=16) plt.tight_layout() outfile = os.path.join(comp.paths.figures_dir, 'livetime', 'livetime-array-{}.png'.format(config)) comp.check_output_dir(outfile) plt.savefig(outfile)
import argparse import os import pandas as pd from collections import defaultdict from icecube import dataio, dataclasses, icetray, phys_services from icecube.frame_object_diff.segments import uncompress from I3Tray import * import comptools as comp if __name__ == "__main__": # Setup global path names comp.check_output_dir(comp.paths.comp_data_dir) p = argparse.ArgumentParser( description='Saves tank xy coordinates for plotting purposes') p.add_argument('-o', '--outfile', dest='outfile', default=os.path.join(comp.paths.comp_data_dir, 'tankcoordinates.hdf'), help='Output file') args = p.parse_args() t0 = time.time() file_list = ['/data/ana/CosmicRay/IceTop_level3/sim/IC79/GCD/Level3_7006_GCD.i3.gz'] tray = I3Tray() tray.Add('I3Reader', FileNameList=file_list)
default=False, action='store_true', help='Option to overwrite reference map file, ' 'if it alreadu exists') args = p.parse_args() # profile = LineProfiler(anisotropy.make_skymaps, get_random_times, # get_batch_start_stop_rows) # profile.enable_by_count() if args.outfile_sample_0 is None or args.outfile_sample_1 is None: raise ValueError('Expecting two output files to be specified') else: for outfile in [args.outfile_sample_0, args.outfile_sample_1]: comp.check_output_dir(outfile) # Load DataFrame for config df_file = os.path.join(comp.paths.comp_data_dir, args.config + '_data', 'anisotropy_dataframe.hdf') with pd.HDFStore(df_file, mode='r') as store: n_rows = store.get_storer('dataframe').nrows start_row, stop_row = get_batch_start_stop_rows( n_rows, args.n_batches, args.batch_idx) splits = train_test_split(np.arange(n_rows), test_size=0.4, random_state=args.trial_idx) np.random.seed(args.trial_idx) for split_idx, split_indices in enumerate(splits): # print('On split {}...'.format(split_idx))
lw=2, marker='None', label='Test case') ax.set_yscale("log", nonposy='clip') ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$') if idx == 0: ax.set_ylabel( '$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$' ) ax.set_title(composition) ax.grid(lw=1, which='both') ax.legend() true_flux_outfile = os.path.join( figures_dir, 'true_flux_{}-groups_{}-case.png'.format(num_groups, case)) comp.check_output_dir(true_flux_outfile) plt.savefig(true_flux_outfile) # Run analysis pipeline on simulation counts_observed = pd.DataFrame(0, index=range(num_ebins), columns=comp_list) counts_observed_err = pd.DataFrame(0, index=range(num_ebins), columns=comp_list) weights = pd.DataFrame(0, index=range(num_ebins), columns=comp_list) # Construct mask for energy bin energy_bins = np.digitize(df_sim_data['MC_log_energy'], bins=energybins.log_energy_bins) - 1 for idx_log_energy, composition in itertools.product( range(len(energybins.log_energy_midpoints)), comp_list):
p.add_argument('--outfile', dest='outfile', help='Output reference map file') p.add_argument('--overwrite', dest='overwrite', default=False, action='store_true', help='Option to overwrite reference map file, ' 'if it alreadu exists') args = p.parse_args() if args.outfile is None: raise ValueError('Outfile must be specified') else: comp.check_output_dir(args.outfile) # Load DataFrame for config df_file = os.path.join(comp.paths.comp_data_dir, args.config + '_data', 'anisotropy_dataframe.hdf') nrows = get_nrows(df_file) data_df = get_dataframe_batch(df_file, args.n_splits, args.split_idx, nrows=nrows) times = get_random_times(df_file, data_df.shape[0], n_resamples=20, nrows=nrows) mask = np.ones(data_df.shape[0], dtype=bool)
dest='output', help='Path to output hdf5 file') args = parser.parse_args() # Validate user input if args.type == 'sim' and args.config not in comp.simfunctions.get_sim_configs( ): raise ValueError('Invalid simulation config {} entered'.format( args.config)) elif args.type == 'data' and args.config not in comp.datafunctions.get_data_configs( ): raise ValueError('Invalid data config {} entered'.format(args.config)) if args.sim is not None and args.type == 'data': raise ValueError('Cannot process detector data when a simulation ' 'dataset is specified') comp.check_output_dir(args.output) print('\ninput:\n\t{}'.format(args.input)) print('\noutput:\n\t{}'.format(args.output)) with comp.localized(inputs=args.input, output=args.output) as (inputs, output): print('local inputs:\n{}'.format(inputs)) print('local output:\n{}'.format(output)) df = process_i3_hdf(input_file=inputs, config=args.config, datatype=args.type, sim=args.sim) df.to_hdf(output, key='dataframe', mode='w', format='table')
help='Sigmoid function to fit to effective area') args = parser.parse_args() config = args.config num_groups = args.num_groups sigmoid = args.sigmoid n_samples = args.n_samples eff_fit = fit_efficiencies(df_file=args.df_file, config=config, num_groups=num_groups, sigmoid=sigmoid, n_samples=n_samples) print(eff_fit) eff_outfile = os.path.join( comp.paths.comp_data_dir, config, 'efficiencies', 'efficiency_fit_num_groups_{}_sigmoid-{}.hdf'.format( num_groups, sigmoid), ) comp.check_output_dir(eff_outfile) # Only want to save fitted efficiencies for energies in analysis range bin_midpoints_mask = np.logical_and( bin_midpoints >= energybins.log_energy_min, bin_midpoints <= energybins.log_energy_max) # eff_fit.loc[bin_midpoints_mask, :] # .reset_index(drop=True) # .to_hdf(eff_outfile, 'dataframe')
true_target = df_sim_test['comp_target_{}'.format(num_groups)].values res_normalized, res_normalized_err = comp.normalized_response_matrix( true_energy=log_true_energy_sim_test, reco_energy=log_reco_energy_sim_test, true_target=true_target, pred_target=pred_target, efficiencies=efficiencies, efficiencies_err=efficiencies_err, energy_bins=energybins.log_energy_bins) res_mat_outfile = os.path.join(comp.paths.comp_data_dir, config, 'unfolding', 'response_{}-groups.txt'.format(num_groups)) res_mat_err_outfile = os.path.join( comp.paths.comp_data_dir, config, 'unfolding', 'response_err_{}-groups.txt'.format(num_groups)) comp.check_output_dir(res_mat_outfile) comp.check_output_dir(res_mat_err_outfile) np.savetxt(res_mat_outfile, res_normalized) np.savetxt(res_mat_err_outfile, res_normalized_err) # Priors array print('Calcuating priors...') priors_list = [ 'H3a', 'H4a', 'simple_power_law', 'broken_power_law', ] color_dict = comp.get_color_dict() for prior_name, marker in zip(priors_list, '.^*ox'):
ax1.legend() for idx, config in enumerate(args.config): if config == 'IC86.2012': continue counts = config_counts_dict[config] frequency = counts/np.sum(counts) frequency_err = np.sqrt(counts)/np.sum(counts) counts_2012 = config_counts_dict['IC86.2012'] frequency_2012 = counts_2012/np.sum(counts_2012) frequency_err_2012 = np.sqrt(counts_2012)/np.sum(counts_2012) ratio, ratio_err = comp.analysis.ratio_error(frequency, frequency_err, frequency_2012, frequency_err_2012) plotting.plot_steps(energybins.log_energy_bins, ratio, yerr=ratio_err, color='C{}'.format(idx), label=config, alpha=0.8, ax=ax2) ax2.axhline(1, marker='None', linestyle='-.', color='k', lw=1.5) ax2.set_ylabel('$\mathrm{f/f_{2012}}$') # ax2.set_ylabel('Ratio with IC86.2012') ax2.set_xlabel('$\mathrm{\log_{10}(E_{reco}/GeV)}$') # ax2.set_ylim(0) ax2.set_xlim(energybins.log_energy_min, energybins.log_energy_max) ax2.grid() energy_dist_outfile = os.path.join(comp.paths.figures_dir, 'yearly_data_comparisons', 'energy_dist.png') comp.check_output_dir(energy_dist_outfile) plt.savefig(energy_dist_outfile)
score_counts_2012 = np.histogram(config_scores_dict['IC86.2012'], bins=score_bins)[0] score_freq_2012 = score_counts_2012 / np.sum(score_counts_2012) score_freq_err_2012 = np.sqrt(score_counts_2012) / np.sum( score_counts_2012) ratio, ratio_err = comp.analysis.ratio_error(score_freq, score_freq_err, score_freq_2012, score_freq_err_2012) plotting.plot_steps(score_bins, ratio, yerr=ratio_err, color='C{}'.format(idx), label=config, alpha=0.8, ax=ax2) ax2.axhline(1, marker='None', linestyle='-.', color='k', lw=1.5) ax2.set_ylabel('$\mathrm{f/f_{2012}}$') # ax2.set_ylabel('Ratio with IC86.2012') ax2.set_xlabel('BDT score') # ax2.set_ylim(0) ax2.set_xlim(min_score, max_score) ax2.grid() score_outfile = os.path.join(comp.paths.figures_dir, 'yearly_data_comparisons', 'BDT_scores.png') comp.check_output_dir(score_outfile) plt.savefig(score_outfile)
lw=1, color=color_dict['heavy'], label='heavy', ax=ax2) ax2.set_ylabel( '1$\mathrm{\sigma}$ of $\mathrm{\log_{10}(E_{reco}/E_{true})}$') ax2.set_xlabel('$\mathrm{\log_{10}(E_{true}/GeV)}$') ax2.set_ylim(0) ax2.set_xlim(energybins.log_energy_min, energybins.log_energy_max) ax2.grid() energy_res_outfile = os.path.join(comp.paths.figures_dir, 'laputop_performance', 'energy_res_{}.png'.format(config)) comp.check_output_dir(energy_res_outfile) plt.savefig(energy_res_outfile) # Core resolution fig, ax = plt.subplots() for composition in comp_list: core_diff = np.sqrt((df_sim[MC_comp_mask[composition]]['lap_x'] - df_sim[MC_comp_mask[composition]]['MC_x'])**2 \ +(df_sim[MC_comp_mask[composition]]['lap_y'] - df_sim[MC_comp_mask[composition]]['MC_y'])**2) energy = df_sim[MC_comp_mask[composition]]['MC_energy'] core_res = comp.analysis.get_resolution(energy, core_diff, energybins.energy_bins) plotting.plot_steps(energybins.log_energy_bins, core_res, lw=1, color=color_dict[composition], label=composition,
response_err = np.loadtxt(response_err_file) # Plot response matrix fig, ax = plt.subplots() plt.imshow(response, origin='lower', cmap='viridis') ax.plot([0, response.shape[0]-1], [0, response.shape[1]-1], marker='None', ls=':', color='C1') ax.set_xlabel('True bin') ax.set_ylabel('Reconstructed bin') ax.set_title('Response matrix') plt.colorbar(label='$\mathrm{P(E_i|C_{\mu})}$') response_plot_outfile = os.path.join( comp.paths.figures_dir, 'unfolding', config, 'response_matrix', 'response-matrix_{}-groups.png'.format(num_groups)) comp.check_output_dir(response_plot_outfile) plt.savefig(response_plot_outfile) # Plot response matrix error fig, ax = plt.subplots() plt.imshow(response_err, origin='lower', cmap='viridis') ax.plot([0, response_err.shape[0]-1], [0, response_err.shape[1]-1], marker='None', ls=':', color='C1') ax.set_xlabel('True bin') ax.set_ylabel('Reconstructed bin') ax.set_title('Response matrix') plt.colorbar(label='$\mathrm{\delta P(E_i|C_{\mu})}$') response_plot_outfile = os.path.join( comp.paths.figures_dir, 'unfolding', config, 'response_matrix', 'response_err-matrix_{}-groups.png'.format(num_groups))
def main(config, num_groups, prior, ts_stopping, case): figures_dir = os.path.join(comp.paths.figures_dir, 'unfolding', config, 'datachallenge', '{}_case'.format(case), '{}_prior'.format(prior), 'ts_stopping_{}'.format(ts_stopping)) # Calculate desired counts distribution for test case counts_true = pd.DataFrame( index=range(num_ebins), # counts_true = pd.DataFrame(index=energybins.log_energy_midpoints, columns=comp_list) for composition in comp_list: flux_to_counts_scaling = eff_area[ composition] * livetime * solid_angle * energybins.energy_bin_widths counts_true[composition] = get_test_counts(case, composition, num_groups, energybins.energy_midpoints, flux_to_counts_scaling) counts_true['total'] = counts_true.sum(axis=1) # Plot true flux and H4a flux (as a visual reference) fig, axarr = plt.subplots(nrows=1, ncols=num_groups + 1, sharex=True, sharey=True, figsize=(15, 5)) for idx, composition in enumerate(comp_list + ['total']): ax = axarr[idx] model_flux = comp.model_flux(model='H4a', energy=energybins.energy_midpoints, num_groups=num_groups) model_comp_flux = model_flux['flux_{}'.format(composition)].values ax.plot(energybins.log_energy_midpoints, energybins.energy_midpoints**2.7 * model_comp_flux, color=color_dict[composition], ls='-.', lw=2, marker='None', label='H4a') comp_flux, _ = counts_to_flux(counts_true[composition], composition=composition) ax.plot(energybins.log_energy_midpoints, comp_flux, color=color_dict[composition], ls='-', lw=2, marker='None', label='Test case') ax.set_yscale("log", nonposy='clip') ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$') if idx == 0: ax.set_ylabel( '$\mathrm{ E^{2.7} \ J(E) \ [GeV^{1.7} m^{-2} sr^{-1} s^{-1}]}$' ) ax.set_title(composition) ax.grid(lw=1, which='both') ax.legend() true_flux_outfile = os.path.join( figures_dir, 'true_flux_{}-groups_{}-case.png'.format(num_groups, case)) comp.check_output_dir(true_flux_outfile) plt.savefig(true_flux_outfile) # Run analysis pipeline on simulation counts_observed = pd.DataFrame(0, index=range(num_ebins), columns=comp_list) counts_observed_err = pd.DataFrame(0, index=range(num_ebins), columns=comp_list) weights = pd.DataFrame(0, index=range(num_ebins), columns=comp_list) # Construct mask for energy bin energy_bins = np.digitize(df_sim_data['MC_log_energy'], bins=energybins.log_energy_bins) - 1 for idx_log_energy, composition in itertools.product( range(len(energybins.log_energy_midpoints)), comp_list): log_energy = energybins.log_energy_midpoints[idx_log_energy] # Filter out events that don't pass composition & energy mask comp_mask = df_sim_data['comp_group_{}'.format( num_groups)] == composition energy_mask = energy_bins == idx_log_energy df_sim_bin = df_sim_data.loc[comp_mask & energy_mask, :] # Reweight simulation events to get desired number of events weight = counts_true[composition][idx_log_energy] / df_sim_bin.shape[0] # weight = counts_true.loc[log_energy, composition] / df_sim_bin.shape[0] weights.loc[idx_log_energy, composition] = weight # Get predicted composition pred_target = pipeline.predict(df_sim_bin[feature_list].values) pred_comp = np.array( comp.decode_composition_groups(pred_target, num_groups=num_groups)) assert len(pred_comp) == df_sim_bin.shape[0] for p_comp in np.unique(pred_comp): pred_comp_mask = pred_comp == p_comp comp_counts, _ = np.histogram(df_sim_bin.loc[pred_comp_mask, 'reco_log_energy'], bins=energybins.log_energy_bins) counts_observed[p_comp] += weight * comp_counts counts_observed_err[p_comp] += [ sum(weight**2 for _ in range(c)) for c in comp_counts ] # Square root the sum of squares of the weight errors for composition in comp_list: counts_observed_err[composition] = np.sqrt( counts_observed_err[composition]) counts_observed_err['total'] = np.sqrt( np.sum(counts_observed_err[composition]**2 for composition in comp_list)) # Calculate total counts counts_observed['total'] = counts_observed.sum(axis=1) # Plot weights for each composition and energy bin fig, ax = plt.subplots() for composition in comp_list: weights[composition].plot(ls=':', label=composition, color=color_dict[composition], ax=ax) ax.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$') ax.set_ylabel('Weights') ax.set_yscale("log", nonposy='clip') ax.grid(lw=1) ax.legend() weights_outfile = os.path.join( figures_dir, 'weights_{}-groups_{}.png'.format(num_groups, case)) comp.check_output_dir(weights_outfile) plt.savefig(weights_outfile) # Format observed counts, detection efficiencies, and priors for PyUnfold use counts_pyunfold = np.empty(num_groups * len(energybins.energy_midpoints)) counts_err_pyunfold = np.empty(num_groups * len(energybins.energy_midpoints)) efficiencies = np.empty(num_groups * len(energybins.energy_midpoints)) efficiencies_err = np.empty(num_groups * len(energybins.energy_midpoints)) for idx, composition in enumerate(comp_list): counts_pyunfold[idx::num_groups] = counts_observed[composition] counts_err_pyunfold[idx::num_groups] = counts_observed_err[composition] efficiencies[idx::num_groups] = df_eff['eff_median_{}'.format( composition)] efficiencies_err[idx::num_groups] = df_eff['eff_err_low_{}'.format( composition)] formatted_df = pd.DataFrame({ 'counts': counts_pyunfold, 'counts_err': counts_err_pyunfold, 'efficiencies': efficiencies, 'efficiencies_err': efficiencies_err }) formatted_file = os.path.join( os.getcwd(), 'test_{}_{}_{}.hdf'.format(prior, case, ts_stopping)) formatted_df.to_hdf(formatted_file, 'dataframe', mode='w') root_file = os.path.join( os.getcwd(), 'test_{}_{}_{}.root'.format(prior, case, ts_stopping)) save_pyunfold_root_file(config=config, num_groups=num_groups, outfile=root_file, formatted_df_file=formatted_file) if prior == 'Jeffreys': prior_pyunfold = 'Jeffreys' else: model_flux = comp.model_flux(model=prior, energy=energybins.energy_midpoints, num_groups=num_groups) prior_pyunfold = np.empty(num_groups * len(energybins.energy_midpoints)) for idx, composition in enumerate(comp_list): prior_pyunfold[idx::num_groups] = model_flux['flux_{}'.format( composition)] df_unfolding_iter = unfold(config_name=os.path.join(config, 'config.cfg'), priors=prior_pyunfold, input_file=root_file, ts_stopping=ts_stopping) # Delete temporary ROOT file needed for PyUnfold os.remove(root_file) os.remove(formatted_file) # print('\n{} case (prior {}): {} iterations'.format(case, prior, df_unfolding_iter.shape[0])) output = {'prior': prior, 'ts_stopping': ts_stopping, 'case': case} counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist( df_unfolding_iter, iteration=-1, num_groups=num_groups) for idx, composition in enumerate(comp_list + ['total']): # Pre-unfolding flux plot initial_counts = counts_observed[composition].values initial_counts_err = counts_observed_err[composition].values # initial_counts_err = np.sqrt(initial_counts) initial_flux, initial_flux_err_stat = counts_to_flux( initial_counts, initial_counts_err, composition=composition) initial_flux_err_sys = np.zeros_like(initial_flux) # Unfolded flux plot flux, flux_err_sys = unfolded_counts_to_flux( counts[composition], counts_sys_err[composition]) flux, flux_err_stat = unfolded_counts_to_flux( counts[composition], counts_stat_err[composition]) # True flux true_counts = counts_true[composition].values true_counts_err = np.sqrt(true_counts) true_flux, true_flux_err_stat = counts_to_flux(true_counts, true_counts_err, composition=composition) true_flux_err_sys = np.zeros_like(true_flux) output['flux_{}'.format(composition)] = flux output['flux_err_stat_{}'.format(composition)] = flux_err_stat output['flux_err_sys_{}'.format(composition)] = flux_err_sys output['true_flux_{}'.format(composition)] = true_flux output['true_flux_err_stat_{}'.format( composition)] = true_flux_err_stat output['true_flux_err_sys_{}'.format(composition)] = true_flux_err_sys output['initial_flux_{}'.format(composition)] = initial_flux output['initial_flux_err_stat_{}'.format( composition)] = initial_flux_err_stat output['initial_flux_err_sys_{}'.format( composition)] = initial_flux_err_sys # Don't want to consume too much memory by keeping too many figures open plt.close('all') return output
ax_ratio.errorbar(energybins.log_energy_midpoints, frac_diff, yerr=frac_diff_stat, color=color, ls='None', marker=marker, label='Flux ratio ({})'.format(label), alpha=0.8) ax_ratio.axhline(0, ls='-.', lw=1, marker='None', color='k') ax_ratio.grid(linestyle='dotted', which="both", lw=1) ax_ratio.set_yticks(np.arange(-1, 1.5, 0.25)) ax_ratio.set_ylim(-1, 1) if idx == 0: ax_ratio.set_ylabel('$\mathrm{(J - J_{true}) / J_{true}}$', fontsize=10) else: plt.setp(ax_ratio.get_yticklabels(), visible=False) ax_ratio.set_xlabel('$\mathrm{\log_{10}(E/GeV)}$', fontsize=10) ax_ratio.tick_params(axis='both', which='major', labelsize=10) # ax_ratio.legend(fontsize=8) plt.tight_layout() flux_outfile = os.path.join( figures_dir, 'flux_ratio_{}-groups_{}-case.png'.format(num_groups, case)) comp.check_output_dir(flux_outfile) plt.savefig(flux_outfile) # Don't want to consume too much memory by keeping too many figures open plt.close('all')