def generate_worker_command(line, command_template, run_only, per_ad): if run_only: run, site = get_run_info_run_only(line) args = {'run': run, 'site': site} else: run, fileno, site = get_run_info(line) if 'filepath' in command_template: infile = get_file_location(run, fileno) args = { 'run': run, 'fileno': fileno, 'site': site, 'filepath': infile } else: args = {'run': run, 'fileno': fileno, 'site': site} if per_ad: ads = dets_for(site, run) commands = [] for ad in ads: args['ad'] = ad commands.append(command_template.format(**args)) return commands else: command = command_template.format(**args) return command
def _apply_process(run, site, fileno, input_prefix, processed_output_path): """Multiprocessing-friendly call of process (all ADs within a hall together).""" logging.debug('[process] Running on Run %d, file %d', run, fileno) muons_location = os.path.join(input_prefix, f'muons_{run}_{fileno:>04}.root') num_events = -1 debug = False ads = common.dets_for(site, run) for ad in ads: logging.debug( '[process] Running on Run %d, file %d, EH%d-AD%d', run, fileno, site, ad ) events_location = os.path.join( input_prefix, f'events_ad{ad}_{run}_{fileno:>04}.root' ) output_location = os.path.join( processed_output_path, f'EH{site}', f'processed_ad{ad}', f'out_ad{ad}_{run}_{fileno:>04}.root', ) if os.path.isfile(output_location): logging.debug('[process] Found existing file. Skipping. %s', output_location) else: process.main( num_events, events_location, muons_location, output_location, (run, fileno), ad, debug, ) logging.debug('[process] Finished Run %d, file %d', run, fileno) return
def run_create_accidentals(run, site, processed_output_path): """Create a synthetic accidentals sample using the sequential pairing.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') ads = common.dets_for(site, run) ttree_name = 'singles' pairing = 'sequential' pairing_note = '' update_db = None for ad in ads: infile = os.path.join( path_prefix, f'singles_ad{ad}/singles_ad{ad}_{run}.root', ) outfile = os.path.join( path_prefix, f'acc_ad{ad}/acc_ad{ad}_{run}.root', ) if os.path.isfile(outfile): logging.debug( '[create_accidentals] Found existing file. Skipping. %s', outfile ) else: create_accidentals.main( infile, outfile, ttree_name, pairing, pairing_note, update_db, ) return
def run_compute_singles(run, site, processed_output_path, database): """Compute the singles (underlying uncorrelated) rate.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') ads = common.dets_for(site, run) update_db = True iteration = 0 extra_cut = '1' for ad in ads: infile = os.path.join( path_prefix, f'hadded_ad{ad}/out_ad{ad}_{run}.root', ) # Ideally should check to see if rate has been computed before. # But, this is so fast that I will just re-compute every time. for attempt in tenacity.Retrying( reraise=True, wait=tenacity.wait_random_exponential(max=60), retry=tenacity.retry_if_exception_type(sqlite3.Error), before_sleep=tenacity.before_sleep_log(logging, logging.DEBUG), ): with attempt: compute_singles.main( infile, database, GENERAL_LABEL, update_db, iteration, extra_cut, ) return
def run_subtract_accidentals(run, site, processed_output_path, database): path_prefix = os.path.join(processed_output_path, f'EH{site}') ads = common.dets_for(site, run) override_acc_rate = None label = 'test' update_db = True for ad in ads: datafile = os.path.join( path_prefix, f'hadded_ad{ad}/out_ad{ad}_{run}.root', ) accfile = os.path.join( path_prefix, f'acc_ad{ad}/acc_ad{ad}_{run}.root', ) outfile = os.path.join( path_prefix, f'sub_ad{ad}/sub_ad{ad}_{run}.root', ) if os.path.isfile(outfile): logging.debug( '[subtract_accidentals] Found existing file. Skipping. %s', outfile ) else: subtract_accidentals.main( outfile, datafile, accfile, database, ad, override_acc_rate, label, update_db, ) return
def _should_run(run, site, name, progress): """Return True if all ADs have finished the given processing.""" ads = common.dets_for(site, run) should_run = False for ad in ads: run_progress = progress.get((run, ad), None) if run_progress is None: should_run = True elif run_progress[name] == 0 or run_progress[name] is None: should_run = True return should_run
def post_processing(processed_output_path, database): """Run the scripts that rely on all data being processed through many_runs.""" sites = (1, 2, 3) for site in sites: dets = common.dets_for(site) run_hadd_sub_files(site, processed_output_path) for det in dets: run_fit_delayed(site, det, processed_output_path, database) run_compute_num_coincidences(processed_output_path, database) run_compute_total_acc_efficiency(processed_output_path, database) run_baserate_uncertainty(database) run_dist_time_eff(processed_output_path, database) run_delayed_eff_uncertainty(processed_output_path, database) return
def run_hadd(run, site, filenos, processed_output_path): """Combine the processed output files into 1 per run.""" import ROOT path_prefix = os.path.join(processed_output_path, f'EH{site}') input_template = os.path.join( path_prefix, 'processed_ad{ad}/out_ad{ad}_' f'{run}' '_{fileno:>04}.root', ) output_template = os.path.join( path_prefix, 'hadded_ad{ad}/out_ad{ad}_' f'{run}.root', ) ads = common.dets_for(site, run) for ad in ads: infiles = [] for fileno in filenos: infiles.append(input_template.format(ad=ad, fileno=fileno)) outfile = output_template.format(ad=ad) # Check to see if the file already exists if os.path.isfile(outfile): outfile_opened = ROOT.TFile(outfile, 'READ') try: if not outfile_opened.IsZombie(): # Check to see if the outfile has the right number of events outfile_events = outfile_opened.Get( 'ad_events').GetEntries() outfile_opened.Close() infile_events = 0 for infile in infiles: infile_opened = ROOT.TFile(infile, 'READ') infile_events += infile_opened.Get( 'ad_events').GetEntries() infile_opened.Close() if infile_events == outfile_events: logging.debug( '[hadd] Found existing file. Skipping. %s', outfile) continue except (ReferenceError, AttributeError): # If file doesn't have correct TTree pass finally: outfile_opened.Close() command = ['hadd', '-f', '-v', '0', outfile] + infiles subprocess.run(command, check=True) return
def _update_progress_db(database, run, site, ad, column): """Update the progress tracker by checking off the given column. If ad is None then use all ADs active during that run. """ with common.get_db(database) as conn: cursor = conn.cursor() if isinstance(column, str): # Need to use unsafe string interpolation for column name update_string = f''' INSERT INTO processing_progress (RunNo, Hall, DetNo, {column}) VALUES (?, ?, ?, 1) ON CONFLICT (RunNo, DetNo) DO UPDATE SET {column} = 1; ''' else: # other iterable: column_list_commas = ', '.join(column) column_set_equal_1 = ', '.join([f'{c} = 1' for c in column]) ones = ', '.join(['1'] * len(column)) update_string = f''' INSERT INTO processing_progress (RunNo, Hall, DetNo, {column_list_commas}) VALUES (?, ?, ?, {ones}) ON CONFLICT (RunNo, DetNo) DO UPDATE SET {column_set_equal_1}; ''' if ad is None: ads = common.dets_for(site, run) rows = [(run, site, det) for det in ads] cursor.executemany(update_string, rows) else: cursor.execute(update_string, (run, site, ad)) if ad is None: logging.debug('Updated progress db for Run %d, all ADs, script %s', run, column) else: logging.debug('Updated progress db for Run %d, AD %d, script %s', run, ad, column)
def run_hadd_sub_files(site, processed_output_path): """Combine all accidentals-subtracted files for a given AD.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') input_template = os.path.join(path_prefix, 'sub_{label}_ad{ad}/sub_ad{ad}_?????.root') output_template = os.path.join(path_prefix, 'sub_{label}_ad{ad}/sub_ad{ad}.root') ads = common.dets_for(site) num_cores = str(min(os.cpu_count(), 10)) for ad in ads: for label in ['nominal', 'using_adtime']: input_glob = input_template.format(ad=ad, label=label) infiles = glob.glob(input_glob) outfile = output_template.format(ad=ad, label=label) command = ['hadd', '-f', '-v', '0', '-j', num_cores, outfile ] + infiles subprocess.run(command, check=True) return
def run_create_singles(run, site, processed_output_path): """Create a sample of isolated singles (for acc spectrum & DT, not for rate).""" path_prefix = os.path.join(processed_output_path, f'EH{site}') ads = common.dets_for(site, run) ttree_name = 'ad_events' for ad in ads: infile = os.path.join( path_prefix, f'hadded_ad{ad}/out_ad{ad}_{run}.root', ) outfile = os.path.join( path_prefix, f'singles_ad{ad}/singles_ad{ad}_{run}.root', ) if os.path.isfile(outfile): logging.debug('[create_singles] Found existing file. Skipping. %s', outfile) else: create_singles.main(infile, outfile, ttree_name) return
def _apply_process(run, site, fileno, input_prefix, processed_output_path): """Multiprocessing-friendly call of process (all ADs within a hall together).""" logging.info('[process] Running on Run %d, file %d', run, fileno) muons_location = os.path.join(input_prefix, f'muons_{run}_{fileno:>04}.root') num_events = -1 debug = False ads = common.dets_for(site, run) for ad in ads: logging.debug('[process] Running on Run %d, file %d, EH%d-AD%d', run, fileno, site, ad) events_location = os.path.join( input_prefix, f'events_ad{ad}_{run}_{fileno:>04}.root') flashers_location = os.path.join( input_prefix, 'flashers', f'q1q2_ad{ad}_{run}_{fileno:>04}.root') output_location = os.path.join( processed_output_path, f'EH{site}', f'processed_ad{ad}', f'out_ad{ad}_{run}_{fileno:>04}.root', ) if process_adtime.is_complete(events_location, output_location): logging.debug('[process] Already completed Run %d, file %d', run, fileno) else: try: process_adtime.main( num_events, events_location, muons_location, flashers_location, output_location, (run, fileno), ad, debug, ) except: logging.exception( 'Error in Run %d, file %d, ad %d, input muons location %s', run, fileno, ad, muons_location) raise logging.info('[process] Finished Run %d, file %d', run, fileno) return
def run_compute_singles(run, site, processed_output_path, database): """Compute the singles (underlying uncorrelated) rate.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') ads = common.dets_for(site, run) update_db = True iteration = 0 extra_cut = '1' for ad in ads: infile = os.path.join( path_prefix, f'hadded_ad{ad}/out_ad{ad}_{run}.root', ) with common.get_db(database) as conn: cursor = conn.cursor() cursor.execute(''' SELECT COUNT(*) FROM singles_rates WHERE RunNo = ? AND DetNo = ? ''', (run, ad) ) num_existing, = cursor.fetchone() if num_existing > 0: logging.debug( '[compute_singles] Found existing rate. Skipping. Run %d, AD %d', run, ad ) else: compute_singles.main( infile, database, update_db, iteration, extra_cut, ) return
def run_aggregate_stats(run, site, filenos, processed_output_path, database): """Aggregate the muon / livetime statistics for each run.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') input_template = os.path.join( path_prefix, 'processed_ad{ad}/out_ad{ad}_' f'{run}' '_{fileno:>04}.json', ) output_template = os.path.join( path_prefix, 'hadded_ad{ad}/out_ad{ad}_' f'{run}.json', ) ads = common.dets_for(site, run) for ad in ads: infiles = [] for fileno in filenos: infiles.append(input_template.format(ad=ad, fileno=fileno)) outfile = output_template.format(ad=ad) if os.path.isfile(outfile): logging.debug('[aggregate_stats] Found existing file. Skipping. %s', outfile) else: aggregate_stats.main2(run, infiles, site, ad, outfile, database) return
def run_hadd(run, site, filenos, processed_output_path): """Combine the processed output files into 1 per run.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') input_template = os.path.join( path_prefix, 'processed_ad{ad}/out_ad{ad}_' f'{run}' '_{fileno:>04}.root', ) output_template = os.path.join( path_prefix, 'hadded_ad{ad}/out_ad{ad}_' f'{run}.root', ) ads = common.dets_for(site, run) for ad in ads: infiles = [] for fileno in filenos: infiles.append(input_template.format(ad=ad, fileno=fileno)) outfile = output_template.format(ad=ad) if os.path.isfile(outfile): logging.debug('[hadd] Found existing file. Skipping. %s', outfile) else: command = ['hadd', '-f', '-v', '0', outfile] + infiles subprocess.run(command, check=True) return
def _apply_extract_flashers(run, site, fileno, raw_output_path): logging.debug('[extract_flashers] Running on Run %d, file %d', run, fileno) subdir = str(run)[0:3] + '00' # e.g. 21221 -> 21200 output_location = os.path.join(raw_output_path, f'EH{site}', subdir, 'flashers') os.makedirs(output_location, exist_ok=True) ads = common.dets_for(site, run) events_files = [ os.path.join(raw_output_path, f'EH{site}', subdir, f'events_ad{ad}_{run}_{fileno:>04}.root') for ad in ads ] num_events = -1 debug = False infile_location = job_producer.get_file_location(run, fileno) extract_flasher_vars.main( num_events, infile_location, output_location, (run, fileno, site), events_files, debug, ) logging.debug('[extract_flashers] Finished Run %d, file %d', run, fileno) return
def run_aggregate_stats(run, site, filenos, processed_output_path, database): """Aggregate the muon / livetime statistics for each run.""" path_prefix = os.path.join(processed_output_path, f'EH{site}') input_template = os.path.join( path_prefix, 'processed_ad{ad}/out_ad{ad}_' f'{run}' '_{fileno:>04}.json', ) output_template = os.path.join( path_prefix, 'hadded_ad{ad}/out_ad{ad}_' f'{run}.json', ) ads = common.dets_for(site, run) for ad in ads: infiles = [] for fileno in filenos: infiles.append(input_template.format(ad=ad, fileno=fileno)) outfile = output_template.format(ad=ad) if aggregate_stats.is_complete(run, ad, outfile, GENERAL_LABEL, database): logging.debug( '[aggregate_stats] Found existing file. Skipping. %s', outfile) else: for attempt in tenacity.Retrying( reraise=True, wait=tenacity.wait_random_exponential(max=60), retry=tenacity.retry_if_exception_type(sqlite3.Error), before_sleep=tenacity.before_sleep_log( logging, logging.DEBUG), ): with attempt: aggregate_stats.main2(run, infiles, site, ad, outfile, GENERAL_LABEL, database) return
def main(database, datafile_base, hall_constraint, det_constraint, update_db): import ROOT with sqlite3.Connection(database) as conn: conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute('''SELECT RunNo, Hall FROM runs ORDER BY Hall, RunNo''') #cursor.execute('''SELECT runs.RunNo, Hall FROM runs LEFT JOIN num_coincidences_by_run ON #runs.RunNo = num_coincidences_by_run.RunNo WHERE num_coincidences_by_run.RunNo IS #NULL;''') runs = cursor.fetchall() cursor.execute('''SELECT Hall, DetNo, Peak, Resolution FROM delayed_energy_fits''') delayed_fits = cursor.fetchall() energy_bounds = {} for row in delayed_fits: mean = row['Peak'] width = row['Resolution'] upper = mean + 3 * width lower = mean - 3 * width energy_bounds[row['Hall'], row['DetNo']] = (lower, upper) coincidences_by_det = {halldet: 0 for halldet in common.all_ads} for run, hall in runs: if hall_constraint is not None and hall != hall_constraint: continue dets = common.dets_for(hall, run) # Fetch AD numbers given EH and 6/8/7AD period for det in dets: if det_constraint is not None and det != det_constraint: continue filename = os.path.join( datafile_base, f'EH{hall}', f'hadded_ad{det}', f'out_ad{det}_{run}.root' ) delayed_bounds = energy_bounds[hall, det] datafile = ROOT.TFile(filename, 'READ') events = datafile.Get('ad_events') num_candidates = events.Draw( 'energy[0]', 'multiplicity == 2 && ' f'energy[1] > {delayed_bounds[0]} && ' f'energy[1] < {delayed_bounds[1]} && ' f'({delayeds._NH_THU_DIST_TIME_CUT_STR})', 'goff' ) if update_db: with sqlite3.Connection(database) as conn: cursor = conn.cursor() cursor.execute('''INSERT OR REPLACE INTO num_coincidences_by_run VALUES (?, ?, ?, ?, "Nominal rate-only 9/17/2020")''', (run, det, json.dumps([num_candidates]), json.dumps([1500, 12000]))) else: print(f'Finished Run {run} EH{hall}-AD{det}: {num_candidates}') coincidences_by_det[hall, det] += num_candidates if update_db: with sqlite3.Connection(database) as conn: cursor = conn.cursor() for (hall, det), num in coincidences_by_det.items(): if num > 0: cursor.execute('''INSERT OR REPLACE INTO num_coincidences VALUES (?, ?, ?, ?, "Nominal rate-only 9/17/2020")''', (hall, det, json.dumps([num]), json.dumps([1500, 12000]))) else: pprint.pprint(coincidences_by_det)
def run_subtract_accidentals(run, site, processed_output_path, database): path_prefix = os.path.join(processed_output_path, f'EH{site}') ads = common.dets_for(site, run) override_acc_rate = None update_db = True for ad in ads: datafile = os.path.join( path_prefix, f'hadded_ad{ad}/out_ad{ad}_{run}.root', ) accfile = os.path.join( path_prefix, f'acc_nominal_ad{ad}/acc_ad{ad}_{run}.root', ) outfile = os.path.join( path_prefix, f'sub_nominal_ad{ad}/sub_ad{ad}_{run}.root', ) if os.path.isfile(outfile): logging.debug( '[subtract_accidentals] Found existing file. Skipping. %s', outfile) else: label = NOMINAL_LABEL for attempt in tenacity.Retrying( reraise=True, wait=tenacity.wait_random_exponential(max=60), retry=tenacity.retry_if_exception_type(sqlite3.Error), before_sleep=tenacity.before_sleep_log( logging, logging.DEBUG), ): with attempt: subtract_accidentals.main( outfile, datafile, accfile, database, ad, override_acc_rate, label, GENERAL_LABEL, update_db, ) outfile = os.path.join( path_prefix, f'sub_using_adtime_ad{ad}/sub_ad{ad}_{run}.root', ) accfile = os.path.join( path_prefix, f'acc_using_adtime_ad{ad}/acc_ad{ad}_{run}.root', ) if os.path.isfile(outfile): logging.debug( '[subtract_accidentals] Found existing file. Skipping. %s', outfile) else: label = ADTIME_LABEL for attempt in tenacity.Retrying( reraise=True, wait=tenacity.wait_random_exponential(max=60), retry=tenacity.retry_if_exception_type(sqlite3.Error), before_sleep=tenacity.before_sleep_log( logging, logging.DEBUG), ): with attempt: subtract_accidentals.main( outfile, datafile, accfile, database, ad, override_acc_rate, label, # Label containing 'adtime' will use AdTime positions GENERAL_LABEL, update_db, ) return