def print_percentages(what, df_sel, df, color='red'): if color == 'red': lu.print_red(f"{what} ", f"{len(df_sel)} = {round(100*len(df_sel)/len(df),1)}%") elif color == 'blue': lu.print_blue(f"{what} ", f"{len(df_sel)} = {round(100*len(df_sel)/len(df),1)}%")
def do_classification(skim_dir, list_models, sntypes): """ SNN classification """ import SuperNNova.supernnova.conf as conf from SuperNNova.supernnova.data import make_dataset from SuperNNova.supernnova.visualization import early_prediction from SuperNNova.supernnova.validation import validate_rnn, metrics lu.print_blue(f"Classifying {skim_dir}") # get config args snn_args = conf.get_args() # create database snn_args.data = True snn_args.data_testing = True snn_args.dump_dir = f"{skim_dir}/" snn_args.raw_dir = f"{skim_dir}/" snn_args.fits_dir = "./" snn_args.sntypes = sntypes settings = conf.get_settings(snn_args) # # make dataset # make_dataset.make_dataset(settings) for model in list_models: # add model file to settings snn_args.model_files = model settings = conf.get_settings(snn_args) model_files = [model] # # # classify # snn_args.validate_rnn = True model_settings = conf.get_settings_from_dump( settings, snn_args.model_files, override_source_data=settings.override_source_data, ) # fetch predictions prediction_file = validate_rnn.get_predictions( model_settings, model_file=snn_args.model_files) # Compute metrics metrics.get_metrics_singlemodel(model_settings, prediction_file=prediction_file, model_type="rnn") # plot lcs model_settings.model_files = snn_args.model_files early_prediction.make_early_prediction(model_settings, nb_lcs=20) # evaluate classifications df = eu.fetch_prediction_info(settings, model_settings, skim_dir) # plots init path_plot = f"{snn_args.dump_dir}/figures/" eu.plot_efficiency(df, skim_dir, path_plot) # "the classified sample" eu.pair_plots(df, path_plot)
def skim_data(raw_dir, dump_dir, fits_file, timevar, debug=False): """ Skim PHOT and HEAD.FITS """ list_files = glob.glob(os.path.join(f"{raw_dir}", "*PHOT.FITS")) if debug: lu.print_yellow('Debugging mode') list_files = list_files[:1] lu.print_green( f"Starting data skimming, found {len(list_files)} to operate on") # load Bazin df_fits = None if Path(fits_file).exists(): df_fits = du.load_fits(fits_file) tmp_type_list = [] filenames = [] # skim each FITS file for fname in list_files: # fetch data year as prefix dump_prefix = Path(fname).name.split("_")[0] lu.print_blue(f"Processing: {dump_prefix}") df_header, df_phot = du.read_fits(fname) if df_fits is not None: df_header = pd.merge(df_header, df_fits, on='SNID') df_header = df_header[[ k for k in df_header.keys() if 'Unnamed' not in k ]] # apply cuts unique_types, filename = apply_cut_save(df_header, df_phot, timevar=timevar, dump_dir=dump_dir, dump_prefix=dump_prefix) tmp_type_list += unique_types filenames.append(filename)
def get_sample_stats_and_plots(df_pred, photo_Ia, photo_nonIa, skim_dir, model_files=None, out_dir=None, plot=False): # inspect sample path_plots = f"{skim_dir}/figures/" Path(path_plots).mkdir(parents=True, exist_ok=True) vars_to_plot = [ k for k in [ 'REDSHIFT_FINAL', 'PRIVATE(DES_numepochs_ml)', 'all_class0', 'PRIVATE(DES_cand_type)', 'TYPE', 'PRIVATE(DES_mjd_trigger)', 'PKMJDINI' ] if k in photo_Ia['all'].keys() ] if 'fake' in skim_dir: df_dic = {'all_lcs': df_pred, 'photo Ia sample': photo_Ia['all']} vars_to_plot += [ 'PRIVATE(DES_fake_salt2x1)', 'PRIVATE(DES_fake_salt2c)' ] else: df_dic = { 'all_lcs': df_pred, 'photo Ia sample': photo_Ia['all'], 'contaminants': photo_Ia['spec_nonIa'], 'photo other but spec Ia ': photo_nonIa['spec_Ia'] } for var in [k for k in vars_to_plot]: vu.plot_superimposed_hist(df_dic, var, nameout=f"{path_plots}/hist_{var}_dist.png", log=True) for var in ['FLUXCAL_max', 'SNRMAX1']: # photo sample zoom vu.plot_superimposed_hist(df_dic, var, nameout=f"{path_plots}/hist_{var}_dist.png", log=True, limits_from_photo_sample=True) # Stats lu.print_green(cut_type) eu.print_percentages(f"photo Ias ", photo_Ia['all'], df_pred, color='blue') if dtype == 'real': lu.print_blue(f" are spec Ias ", len(photo_Ia['spec_Ia'])) lu.print_blue(f' are spec other', len(photo_Ia['spec_nonIa'])) lu.print_blue( f' gals ', len(photo_Ia['spec_nonIa'][photo_Ia['spec_nonIa']['TYPE'] == 81])) lu.print_red(f"missed Ias ", len(photo_nonIa['spec_Ia'])) # dump sample if not out_dir: out_dir = f"{skim_dir}/sample/" Path(out_dir).mkdir(parents=True, exist_ok=True) # filla na photo_Ia['all_no_spec_nonIa'] = photo_Ia['all_no_spec_nonIa'].fillna(0) photo_Ia['spec_nonIa'] = photo_Ia['spec_nonIa'].fillna(0) photo_Ia['all_no_spec_nonIa'][[ 'SNID', 'HOSTGAL_OBJID', 'DEC', 'RA', 'TYPE', 'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0', 'c', 'x1' ]].to_csv(f'{out_dir}/photo_Ia.csv') # dump contaminants photo_Ia['spec_nonIa'][[ 'SNID', 'HOSTGAL_OBJID', 'DEC', 'RA', 'TYPE', 'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0', 'c', 'x1' ]].to_csv(f'{out_dir}/photo_Ia_spec_contamination.csv') # dump missed Ias photo_nonIa['spec_Ia'][[ 'SNID', 'HOSTGAL_OBJID', 'DEC', 'RA', 'TYPE', 'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0', 'c', 'x1' ]].to_csv(f'{out_dir}/photo_nonIa_spec_Ia.csv') # plot lcs if plot: vu.plot_early_classification(skim_dir, prefix='photo_Ia_', df=photo_Ia['all'], model_files=model_files, out_dir=out_dir) vu.plot_early_classification(skim_dir, prefix='photo_Ia_spec_contamination', df=photo_Ia['spec_nonIa'], model_files=model_files, out_dir=out_dir) vu.plot_early_classification(skim_dir, prefix='photo_nonIa_', df=photo_nonIa['all'], model_files=model_files, out_dir=out_dir) vu.plot_early_classification(skim_dir, prefix='photo_nonIa_spec_Ia_', df=photo_nonIa['spec_Ia'], model_files=model_files, out_dir=out_dir) # sample histograms of type vu.plot_hist(photo_Ia['all'], 'TYPE', nameout=f"{path_plots}/photo_Ia_hist_type.png", log=True) vu.plot_hist(photo_nonIa['all'], 'TYPE', nameout=f"{path_plots}/photo_nonIa_hist_type.png", log=True) df_dic = { 'photo Ia sample': photo_Ia['all'], 'photo & spec Ia ': photo_Ia['spec_Ia'], 'photo other but spec Ia ': photo_nonIa['spec_Ia'] } for var in [ 'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0' ]: vu.plot_superimposed_hist( df_dic, var, nameout=f"{path_plots}/hist_{var}_dist_spec.png", log=True, only_positive_x=True) for var in ['FLUXCAL_max', 'c', 'x1']: vu.plot_superimposed_hist( df_dic, var, nameout=f"{path_plots}/hist_{var}_dist_spec.png", log=True, only_positive_x=False, bins=20)
path_des_data = os.environ.get("DES_DATA") model_name = "vanilla_S_0_CLF_2_R_None_photometry_DF_1.0_N_global_lstm_32x2_0.05_128_True_mean_C" list_models = glob.glob("../SuperNNova_general/trained_models_mutant/*/*.pt") for model in list_models: model_files = [model] for dtype in ["real", "fake"]: df_pred = {} photo_Ia = {} photo_nonIa = {} for cut_type in ['clump']: #['bazin','clump','trigger']: print() lu.print_blue( f'_____STATS FOR {dtype} with window {cut_type} model {Path(model).name.split("_")[0]}_____' ) print() skim_dir = f"./dumps/{dtype}/{cut_type}/" # fetch predictions df_pred[cut_type] = du.load_predictions_and_info( skim_dir, model_name) # add salt2 fit parameters raw_dir = f"{path_des_data}/DESALL_forcePhoto_{dtype}_snana_fits/" saltfit = du.load_fitres(raw_dir) saltfit = saltfit[['SNID'] + [ k for k in saltfit.keys() if k not in df_pred[cut_type].keys() ]]
# load preds & enrich df_pred = du.load_predictions(fname_preds) df_pred = du.enrich_predictions(df_pred, path_dtype_data) # save the preds dic_pred[dtype][name_model] = df_pred # Select a default "photometric" sample photo_sample = df_pred[df_pred['predicted_target'] == 0] dic_pred[dtype][name_model]['photo_sample'] = np.array([df_pred['predicted_target'] == 0])[0] # save photo sample cols_to_save = ['SNID','HOSTGAL_OBJID', 'DEC', 'RA', 'SNTYPE', 'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ'] cols_to_save += ['all_class0'] if 'vanilla' in name_model else ['all_class0_median','all_class0_std'] photo_sample[cols_to_save].to_csv(f"{out_dir}/photo_sample.csv") lu.print_blue(name_model) print(f'photo sample {len(photo_sample)} representing {int(len(photo_sample)/len(df_pred)*100)}%') # metrics if dtype == 'real': # Ias dic_pred[dtype][name_model]['photo_spec_Ia'] = np.array([(df_pred['predicted_target'] == 0) & ((df_pred['SNTYPE'] == 1) | ( df_pred['SNTYPE'] == 101))])[0] spec_Ia = df_pred[(df_pred['SNTYPE'] == 1) | (df_pred['SNTYPE'] == 101)] print('spec Ia', len(dic_pred[dtype][name_model][dic_pred[dtype][name_model]['photo_spec_Ia']==True]), f"from {len(spec_Ia)}") # non Ias dic_pred[dtype][name_model]['photo_spec_nonIa'] = np.array([(df_pred['predicted_target'] == 0) & (df_pred['SNTYPE'] != 0) & (df_pred['SNTYPE'] != 1) & ( df_pred['SNTYPE'] != 101)])[0] spec_non_Ia = df_pred[(df_pred['SNTYPE'] != 0) & (df_pred['SNTYPE'] != 1) & (df_pred['SNTYPE'] != 101)]