def create_fake_fact_dir(path, runinfo):
    fact_raw = join(path, 'raw')
    fact_aux = join(path, 'aux')
    n = -1
    for index, row in runinfo.iterrows():
        night = int(np.round(runinfo['fNight'][index]))
        run = int(np.round(runinfo['fRunID'][index]))
        run_type = int(np.round(runinfo['fRunTypeKey'][index]))

        if n != night:
            n = night
            nightly_aux_dir = dirname(
                tree_path(night, run, prefix=fact_aux, suffix=''))
            os.makedirs(nightly_aux_dir, exist_ok=True, mode=0o755)
            for daux in DRIVE_AUX_FILE_KEYS:
                with open(
                        join(nightly_aux_dir,
                             str(night) + '_' + daux + '.fits'), 'w') as auxf:
                    auxf.write('I am a fake ' + daux + 'aux file.')

        if run_type == DRS_RUN_TYPE_KEY:
            drs_path = tree_path(night,
                                 run,
                                 prefix=fact_raw,
                                 suffix='.drs.fits.gz')
            os.makedirs(dirname(drs_path), exist_ok=True, mode=0o755)
            with open(drs_path, 'w') as drs_file:
                drs_file.write('I am a fake FACT drs file.')

        if run_type == OBSERVATION_RUN_TYPE_KEY:
            run_path = tree_path(night,
                                 run,
                                 prefix=fact_raw,
                                 suffix='.fits.fz')
            os.makedirs(dirname(run_path), exist_ok=True, mode=0o755)
            with open(run_path, 'w') as raw_file:
                dummy_run = {
                    'fNight': night,
                    'fRunID': run,
                    'NumExpectedPhsEvents': np.random.randint(0, 25000),
                }
                raw_file.write(json.dumps(dummy_run))
Beispiel #2
0
def returnPathIfExists(rawfolder, night, runId):
    """
    Creates a full path for the specific run and test wheater it is an fz or gz file and if it exists
    """
    path = tree_path(night, runId, rawfolder, ".fits")

    if os.path.exists(path + ".fz"):
        return path + ".fz"
    if os.path.exists(path + ".gz"):
        return path + ".gz"
    return None
Beispiel #3
0
def status(obs_dir=join('/gpfs0', 'fact', 'processing', 'public', 'phs',
                        'obs'),
           max_jobs_in_qsub=256,
           queue=QUEUE_NAME,
           runs_in_qstat=None):
    jsonlog('Start')

    runstatus_path = join(obs_dir, 'runstatus.csv')
    runstatus_lock_path = join(obs_dir, '.lock.runstatus.csv')
    tmp_status_dir = join(obs_dir, '.tmp_status')
    obs_std_dir = obs_dir + '.std'

    assert exists(runstatus_path)
    os.makedirs(tmp_status_dir, exist_ok=True)

    try:
        runstatus_lock = FileLock(runstatus_lock_path)
        with runstatus_lock.acquire(timeout=1):
            jsonlog('Aquired lock for runstatus.csv')

            tmp_status = read_and_remove_tmp_status(tmp_status_dir)
            runstatus = rs.read(runstatus_path)
            runstatus = add_tmp_status_to_runstatus(tmp_status, runstatus)
            ri.write(runstatus, runstatus_path)
            jsonlog('Add ' + str(len(tmp_status)) + ' new stati')

            std = [{
                'key': 'StdOutSize',
                'suffix': '.o'
            }, {
                'key': 'StdErrorSize',
                'suffix': '.e'
            }]

            # StdOutSize and StdErrorSize
            #----------------------------
            for s in std:
                jsonlog('Collect {:s}'.format(s['key']))
                no_std_yet = np.isnan(runstatus[s['key']])
                for i, run in runstatus[no_std_yet].iterrows():
                    fNight = int(np.round(run.fNight))
                    fRunID = int(np.round(run.fRunID))
                    std_path = tree_path(fNight,
                                         fRunID,
                                         prefix=obs_std_dir,
                                         suffix=s['suffix'])
                    std_size = np.nan
                    if exists(std_path):
                        std_size = os.stat(std_path).st_size
                    runstatus.at[i, s['key']] = std_size

            # PhsSize and NumActualPhsEvents
            #-------------------------------
            jsonlog('Collect NumActualPhsEvents')
            runs_to_be_checked_now, runstatus = (
                runs_to_be_checked_now_and_incremented_runstatus(runstatus))

            jsonlog('{:d} runs are not checked again for NumActualPhsEvents'.
                    format(len(runstatus) - len(runs_to_be_checked_now)))

            if runs_in_qstat is None:
                runs_in_qstat = qstat(is_in_JB_name=QSUB_OBS_STATUS_PREFIX)

            jsonlog('{:d} satus-jobs for NumActualPhs are running now'.format(
                len(runs_in_qstat)))

            runs_to_be_checked_now = ri.remove_from_first_when_also_in_second(
                first=runs_to_be_checked_now,
                second=runs_in_qstat,
            )

            jsonlog('{:d} runs are checked now for NumActualPhsEvents'.format(
                len(runs_to_be_checked_now)))

            num_runs_for_qsub = max_jobs_in_qsub - len(runs_in_qstat)
            runstatus = runstatus.set_index(ri.ID_RUNINFO_KEYS)

            i = 0
            for index, run in runs_to_be_checked_now.iterrows():
                if i > num_runs_for_qsub:
                    break

                fNight = int(np.round(run.fNight))
                fRunID = int(np.round(run.fRunID))

                phs_path = tree_path(fNight,
                                     fRunID,
                                     prefix=obs_dir,
                                     suffix='.phs.jsonl.gz')
                if np.isnan(run.PhsSize):
                    if exists(phs_path):
                        phs_size = os.stat(phs_path).st_size
                        runstatus.at[(fNight, fRunID), 'PhsSize'] = phs_size
                        # Submitt the intense task of event counting to qsub,
                        # and collect the output next time in
                        # phs/obs/.tmp_status
                        job = {
                            'name':
                            template_to_path(
                                fNight, fRunID,
                                QSUB_OBS_STATUS_PREFIX + '_{N}_{R}'),
                            'o_path':
                            None,
                            #tree_path(fNight, fRunID, tmp_status_dir, '.o'),
                            'e_path':
                            None,
                            #tree_path(fNight, fRunID, tmp_status_dir, '.e'),
                            '--phs_path':
                            phs_path,
                            '--status_path':
                            tree_path(fNight,
                                      fRunID,
                                      prefix=tmp_status_dir,
                                      suffix='.json'),
                        }
                        qsub(job=job,
                             exe_path=which('phs.isdc.obs.status.worker'),
                             queue=queue)
                        i += 1
                    else:
                        runstatus.at[(fNight, fRunID), 'PhsSize'] = np.nan
                        runstatus.at[(fNight, fRunID),
                                     'NumActualPhsEvents'] = np.nan
                runstatus.at[(fNight, fRunID),
                             'StatusIteration'] = run['StatusIteration'] + 1

            runstatus = runstatus.reset_index()
            runstatus['StatusIteration'] -= runstatus['StatusIteration'].min()
            runstatus = set_is_ok(runstatus)
            ri.write(runstatus, runstatus_path)
            jsonlog('{:d} status requests submitted to qsub'.format(i))
    except Timeout:
        jsonlog('Could not aquire lock for ' + runstatus_path)
    jsonlog('End')
Beispiel #4
0
 def get_auxfile_path(self, night, auxfile):
     return tree_path(night,
                      run=None,
                      prefix=self.aux_path,
                      suffix='.{}.fits'.format(auxfile))
Beispiel #5
0
def jobs_and_directory_tree(
    runstatus,
    phs_dir='/gpfs0/fact/processing/public/phs',
    only_a_fraction=1.0,
    fact_raw_dir='/fact/raw',
    fact_drs_dir='/fact/raw',
    fact_aux_dir='/fact/aux',
    java_path='/home/guest/relleums/java8/jdk1.8.0_111',
    fact_tools_jar_path=_fact_tools_jar_path,
    fact_tools_xml_path=_fact_tools_xml_path,
    tmp_dir_base_name='phs_obs_',
):
    """
    Returns a list of job dicts which contain all relevant p to convert a
    raw FACT run into the photon-stream representation.

    Parameters
    ----------

    phs_dir             Output directory of the photon-stream. In there is the
                        observations directory ./obs and simulations directory
                        ./sim

    runstatus           A pandas DataFrame() of the FACT run-info-database
                        which is used as a reference for the runs to be
                        processed. All observation runs are taken into account.
                        If you want to process specific runs, remove the other
                        runs from runstatus.

    only_a_fraction     A ratio between 0.0 and 1.0 to only process a
                        random fraction of the runs. Usefull for debugging over
                        long periodes of observations. (default 1.0)

    fact_raw_dir        The path to the FACT raw observation directory.

    fact_drs_dir        The path to the FACT drs calibration directory.

    fact_aux_dir        The path to the FACT auxiliary directory.

    java_path           The path to the JAVA run time environment to be used
                        for fact-tools.

    fact_tools_jar_path The path to the fact-tools java-jar executable file.

    fact_tools_xml_path The path to the fact-tools steering xml file.

    tmp_dir_base_name   The base name of the temporary directory on the qsub
                        worker nodes. (default 'fact_photon_stream_JOB_ID_')
    """

    phs_dir = abspath(phs_dir)
    fact_raw_dir = abspath(fact_raw_dir)
    fact_drs_dir = abspath(fact_drs_dir)
    fact_aux_dir = abspath(fact_aux_dir)
    java_path = abspath(java_path)
    fact_tools_jar_path = abspath(fact_tools_jar_path)
    fact_tools_xml_path = abspath(fact_tools_xml_path)

    p = {'phs_dir': phs_dir}
    p['obs_dir'] = join(p['phs_dir'], 'obs')
    p['std_dir'] = join(p['phs_dir'], 'obs.std')

    p['fact_tools_jar_path'] = fact_tools_jar_path
    p['fact_tools_xml_path'] = fact_tools_xml_path

    p['phs_readme_path'] = join(p['phs_dir'], 'README.md')
    p['phs_introduction_path'] = join(p['phs_dir'], 'phs_introduction.pdf')

    fraction = np.random.uniform(size=runstatus.shape[0]) < only_a_fraction

    jobs = []
    for i, r in runstatus[fraction].iterrows():
        night = int(np.round(r.fNight))
        runid = int(np.round(r.fRunID))
        job = {}
        job['name'] = fact.path.template_to_path(
            night, runid, QSUB_OBS_PRODUCE_PREFIX + '_{N}_{R}')
        job['--raw_path'] = tree_path(night,
                                      runid,
                                      prefix=fact_raw_dir,
                                      suffix='.fits.fz')
        if not exists(job['--raw_path']):
            jsonlog('{n:d}-{r:03d} raw-file does not exist.'.format(n=night,
                                                                    r=runid))
            continue

        if np.isnan(r.DrsRunID):
            jsonlog('{n:d}-{r:03d} no drs-run assigned.'.format(n=night,
                                                                r=runid))
            continue
        else:
            drs_runid = int(np.round(r.DrsRunID))
        job['--drs_path'] = tree_path(night,
                                      drs_runid,
                                      prefix=fact_drs_dir,
                                      suffix='.drs.fits.gz')
        if not exists(job['--drs_path']):
            jsonlog('{n:d}-{r:03d} drs-file does not exist.'.format(n=night,
                                                                    r=runid))
            continue

        aux_dir = dirname(
            tree_path(night, runid, prefix=fact_aux_dir, suffix=''))
        if not is_aux_dir_pointing_complete(aux_dir):
            jsonlog('{n:d}-{r:03d} aux-dir is not complete.'.format(n=night,
                                                                    r=runid))
            continue

        job['--aux_dir'] = aux_dir
        job['--out_basename'] = fact.path.template_to_path(
            night, runid, '{N}_{R}')
        job['--out_dir'] = dirname(
            tree_path(night, runid, prefix=p['obs_dir'], suffix=''))
        job['--tmp_dir_basename'] = QSUB_OBS_PRODUCE_PREFIX
        job['--java_path'] = java_path
        job['--fact_tools_jar_path'] = fact_tools_jar_path
        job['--fact_tools_xml_path'] = fact_tools_xml_path
        job['o_path'] = tree_path(night,
                                  runid,
                                  prefix=p['std_dir'],
                                  suffix='.o')
        job['e_path'] = tree_path(night,
                                  runid,
                                  prefix=p['std_dir'],
                                  suffix='.e')
        jobs.append(job)

    return jobs, p