def create_fake_fact_dir(path, runinfo): fact_raw = join(path, 'raw') fact_aux = join(path, 'aux') n = -1 for index, row in runinfo.iterrows(): night = int(np.round(runinfo['fNight'][index])) run = int(np.round(runinfo['fRunID'][index])) run_type = int(np.round(runinfo['fRunTypeKey'][index])) if n != night: n = night nightly_aux_dir = dirname( tree_path(night, run, prefix=fact_aux, suffix='')) os.makedirs(nightly_aux_dir, exist_ok=True, mode=0o755) for daux in DRIVE_AUX_FILE_KEYS: with open( join(nightly_aux_dir, str(night) + '_' + daux + '.fits'), 'w') as auxf: auxf.write('I am a fake ' + daux + 'aux file.') if run_type == DRS_RUN_TYPE_KEY: drs_path = tree_path(night, run, prefix=fact_raw, suffix='.drs.fits.gz') os.makedirs(dirname(drs_path), exist_ok=True, mode=0o755) with open(drs_path, 'w') as drs_file: drs_file.write('I am a fake FACT drs file.') if run_type == OBSERVATION_RUN_TYPE_KEY: run_path = tree_path(night, run, prefix=fact_raw, suffix='.fits.fz') os.makedirs(dirname(run_path), exist_ok=True, mode=0o755) with open(run_path, 'w') as raw_file: dummy_run = { 'fNight': night, 'fRunID': run, 'NumExpectedPhsEvents': np.random.randint(0, 25000), } raw_file.write(json.dumps(dummy_run))
def returnPathIfExists(rawfolder, night, runId): """ Creates a full path for the specific run and test wheater it is an fz or gz file and if it exists """ path = tree_path(night, runId, rawfolder, ".fits") if os.path.exists(path + ".fz"): return path + ".fz" if os.path.exists(path + ".gz"): return path + ".gz" return None
def status(obs_dir=join('/gpfs0', 'fact', 'processing', 'public', 'phs', 'obs'), max_jobs_in_qsub=256, queue=QUEUE_NAME, runs_in_qstat=None): jsonlog('Start') runstatus_path = join(obs_dir, 'runstatus.csv') runstatus_lock_path = join(obs_dir, '.lock.runstatus.csv') tmp_status_dir = join(obs_dir, '.tmp_status') obs_std_dir = obs_dir + '.std' assert exists(runstatus_path) os.makedirs(tmp_status_dir, exist_ok=True) try: runstatus_lock = FileLock(runstatus_lock_path) with runstatus_lock.acquire(timeout=1): jsonlog('Aquired lock for runstatus.csv') tmp_status = read_and_remove_tmp_status(tmp_status_dir) runstatus = rs.read(runstatus_path) runstatus = add_tmp_status_to_runstatus(tmp_status, runstatus) ri.write(runstatus, runstatus_path) jsonlog('Add ' + str(len(tmp_status)) + ' new stati') std = [{ 'key': 'StdOutSize', 'suffix': '.o' }, { 'key': 'StdErrorSize', 'suffix': '.e' }] # StdOutSize and StdErrorSize #---------------------------- for s in std: jsonlog('Collect {:s}'.format(s['key'])) no_std_yet = np.isnan(runstatus[s['key']]) for i, run in runstatus[no_std_yet].iterrows(): fNight = int(np.round(run.fNight)) fRunID = int(np.round(run.fRunID)) std_path = tree_path(fNight, fRunID, prefix=obs_std_dir, suffix=s['suffix']) std_size = np.nan if exists(std_path): std_size = os.stat(std_path).st_size runstatus.at[i, s['key']] = std_size # PhsSize and NumActualPhsEvents #------------------------------- jsonlog('Collect NumActualPhsEvents') runs_to_be_checked_now, runstatus = ( runs_to_be_checked_now_and_incremented_runstatus(runstatus)) jsonlog('{:d} runs are not checked again for NumActualPhsEvents'. format(len(runstatus) - len(runs_to_be_checked_now))) if runs_in_qstat is None: runs_in_qstat = qstat(is_in_JB_name=QSUB_OBS_STATUS_PREFIX) jsonlog('{:d} satus-jobs for NumActualPhs are running now'.format( len(runs_in_qstat))) runs_to_be_checked_now = ri.remove_from_first_when_also_in_second( first=runs_to_be_checked_now, second=runs_in_qstat, ) jsonlog('{:d} runs are checked now for NumActualPhsEvents'.format( len(runs_to_be_checked_now))) num_runs_for_qsub = max_jobs_in_qsub - len(runs_in_qstat) runstatus = runstatus.set_index(ri.ID_RUNINFO_KEYS) i = 0 for index, run in runs_to_be_checked_now.iterrows(): if i > num_runs_for_qsub: break fNight = int(np.round(run.fNight)) fRunID = int(np.round(run.fRunID)) phs_path = tree_path(fNight, fRunID, prefix=obs_dir, suffix='.phs.jsonl.gz') if np.isnan(run.PhsSize): if exists(phs_path): phs_size = os.stat(phs_path).st_size runstatus.at[(fNight, fRunID), 'PhsSize'] = phs_size # Submitt the intense task of event counting to qsub, # and collect the output next time in # phs/obs/.tmp_status job = { 'name': template_to_path( fNight, fRunID, QSUB_OBS_STATUS_PREFIX + '_{N}_{R}'), 'o_path': None, #tree_path(fNight, fRunID, tmp_status_dir, '.o'), 'e_path': None, #tree_path(fNight, fRunID, tmp_status_dir, '.e'), '--phs_path': phs_path, '--status_path': tree_path(fNight, fRunID, prefix=tmp_status_dir, suffix='.json'), } qsub(job=job, exe_path=which('phs.isdc.obs.status.worker'), queue=queue) i += 1 else: runstatus.at[(fNight, fRunID), 'PhsSize'] = np.nan runstatus.at[(fNight, fRunID), 'NumActualPhsEvents'] = np.nan runstatus.at[(fNight, fRunID), 'StatusIteration'] = run['StatusIteration'] + 1 runstatus = runstatus.reset_index() runstatus['StatusIteration'] -= runstatus['StatusIteration'].min() runstatus = set_is_ok(runstatus) ri.write(runstatus, runstatus_path) jsonlog('{:d} status requests submitted to qsub'.format(i)) except Timeout: jsonlog('Could not aquire lock for ' + runstatus_path) jsonlog('End')
def get_auxfile_path(self, night, auxfile): return tree_path(night, run=None, prefix=self.aux_path, suffix='.{}.fits'.format(auxfile))
def jobs_and_directory_tree( runstatus, phs_dir='/gpfs0/fact/processing/public/phs', only_a_fraction=1.0, fact_raw_dir='/fact/raw', fact_drs_dir='/fact/raw', fact_aux_dir='/fact/aux', java_path='/home/guest/relleums/java8/jdk1.8.0_111', fact_tools_jar_path=_fact_tools_jar_path, fact_tools_xml_path=_fact_tools_xml_path, tmp_dir_base_name='phs_obs_', ): """ Returns a list of job dicts which contain all relevant p to convert a raw FACT run into the photon-stream representation. Parameters ---------- phs_dir Output directory of the photon-stream. In there is the observations directory ./obs and simulations directory ./sim runstatus A pandas DataFrame() of the FACT run-info-database which is used as a reference for the runs to be processed. All observation runs are taken into account. If you want to process specific runs, remove the other runs from runstatus. only_a_fraction A ratio between 0.0 and 1.0 to only process a random fraction of the runs. Usefull for debugging over long periodes of observations. (default 1.0) fact_raw_dir The path to the FACT raw observation directory. fact_drs_dir The path to the FACT drs calibration directory. fact_aux_dir The path to the FACT auxiliary directory. java_path The path to the JAVA run time environment to be used for fact-tools. fact_tools_jar_path The path to the fact-tools java-jar executable file. fact_tools_xml_path The path to the fact-tools steering xml file. tmp_dir_base_name The base name of the temporary directory on the qsub worker nodes. (default 'fact_photon_stream_JOB_ID_') """ phs_dir = abspath(phs_dir) fact_raw_dir = abspath(fact_raw_dir) fact_drs_dir = abspath(fact_drs_dir) fact_aux_dir = abspath(fact_aux_dir) java_path = abspath(java_path) fact_tools_jar_path = abspath(fact_tools_jar_path) fact_tools_xml_path = abspath(fact_tools_xml_path) p = {'phs_dir': phs_dir} p['obs_dir'] = join(p['phs_dir'], 'obs') p['std_dir'] = join(p['phs_dir'], 'obs.std') p['fact_tools_jar_path'] = fact_tools_jar_path p['fact_tools_xml_path'] = fact_tools_xml_path p['phs_readme_path'] = join(p['phs_dir'], 'README.md') p['phs_introduction_path'] = join(p['phs_dir'], 'phs_introduction.pdf') fraction = np.random.uniform(size=runstatus.shape[0]) < only_a_fraction jobs = [] for i, r in runstatus[fraction].iterrows(): night = int(np.round(r.fNight)) runid = int(np.round(r.fRunID)) job = {} job['name'] = fact.path.template_to_path( night, runid, QSUB_OBS_PRODUCE_PREFIX + '_{N}_{R}') job['--raw_path'] = tree_path(night, runid, prefix=fact_raw_dir, suffix='.fits.fz') if not exists(job['--raw_path']): jsonlog('{n:d}-{r:03d} raw-file does not exist.'.format(n=night, r=runid)) continue if np.isnan(r.DrsRunID): jsonlog('{n:d}-{r:03d} no drs-run assigned.'.format(n=night, r=runid)) continue else: drs_runid = int(np.round(r.DrsRunID)) job['--drs_path'] = tree_path(night, drs_runid, prefix=fact_drs_dir, suffix='.drs.fits.gz') if not exists(job['--drs_path']): jsonlog('{n:d}-{r:03d} drs-file does not exist.'.format(n=night, r=runid)) continue aux_dir = dirname( tree_path(night, runid, prefix=fact_aux_dir, suffix='')) if not is_aux_dir_pointing_complete(aux_dir): jsonlog('{n:d}-{r:03d} aux-dir is not complete.'.format(n=night, r=runid)) continue job['--aux_dir'] = aux_dir job['--out_basename'] = fact.path.template_to_path( night, runid, '{N}_{R}') job['--out_dir'] = dirname( tree_path(night, runid, prefix=p['obs_dir'], suffix='')) job['--tmp_dir_basename'] = QSUB_OBS_PRODUCE_PREFIX job['--java_path'] = java_path job['--fact_tools_jar_path'] = fact_tools_jar_path job['--fact_tools_xml_path'] = fact_tools_xml_path job['o_path'] = tree_path(night, runid, prefix=p['std_dir'], suffix='.o') job['e_path'] = tree_path(night, runid, prefix=p['std_dir'], suffix='.e') jobs.append(job) return jobs, p