Esempio n. 1
0
def _run_state():
    """Uses a state file with a timestamp to control which files will be
    retrieved from the CSA ftp host.

    Ingestion is based on fully-qualified file names from the CSA ftp host,
    because those are difficult to reproduce otherwise.
    """
    builder = nbc.FileNameBuilder(NEOSSatName)
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    start_time = state.get_bookmark(NEOS_BOOKMARK)
    temp = mc.increment_time(start_time, 0).timestamp()
    todo_list, max_timestamp = scrape.build_todo(
        temp, config.working_directory, config.state_fqn)
    max_date = datetime.fromtimestamp(max_timestamp)
    incremental_source = data_source.IncrementalSource(todo_list)
    transferrer = tc.FtpTransfer(config.data_source)
    return rc.run_by_state(config=config, name_builder=builder,
                           command_name=APPLICATION,
                           bookmark_name=NEOS_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           end_time=max_date, chooser=None,
                           source=incremental_source,
                           store_transfer=transferrer)
Esempio n. 2
0
def _run_by_public():
    """Run the processing for observations that are public, but there are
    no artifacts representing the previews in CAOM, or a FITS file in ad.

    Called as gem_run_public. The time-boxing is based on timestamps from a
    state.yml file. Call once/day, since data release timestamps have times
    of 00:00:00.000.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = nbc.FileNameBuilder(gem_name.GemName)
    incremental_source = data_source.PublicIncremental(config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_by_state(config=config,
                           name_builder=name_builder,
                           command_name=main_app.APPLICATION,
                           bookmark_name=data_source.GEM_BOOKMARK,
                           meta_visitors=meta_visitors,
                           data_visitors=DATA_VISITORS,
                           end_time=None,
                           source=incremental_source,
                           chooser=None)
Esempio n. 3
0
def _run_state():
    """Uses a state file with a timestamp to control which entries will be
    processed.
    """
    config = mc.Config()
    config.get_executors()
    return rc.run_by_state(name_builder=nbc.FileNameBuilder(VliteName),
                           command_name=APPLICATION,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS)
Esempio n. 4
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    name_builder = nbc.FileNameBuilder(get_storage_name)
    return rc.run_by_todo(name_builder=name_builder,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS)
Esempio n. 5
0
def _run_state():
    """Uses a state file with a timestamp to control which entries will be
    processed.
    """
    config = mc.Config()
    config.get_executors()
    source = dsc.QueryTimeBoxDataSource(config, preview_suffix='png')
    name_builder = nbc.FileNameBuilder(dao_name.DAOName)
    return rc.run_by_state(name_builder=name_builder,
                           command_name=APPLICATION,
                           bookmark_name=DAO_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           source=source)
Esempio n. 6
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    name_builder = nbc.FileNameBuilder(PHANGSName)
    return rc.run_by_todo(config=None,
                          name_builder=name_builder,
                          command_name=APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          chooser=None)
Esempio n. 7
0
def _run_state():
    """Uses a state file with a timestamp to control which entries will be
    processed.
    """
    name_builder = nbc.FileNameBuilder(PHANGSName)
    return rc.run_by_state(config=None,
                           name_builder=name_builder,
                           command_name=APPLICATION,
                           bookmark_name=None,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           end_time=None,
                           source=None,
                           chooser=None)
Esempio n. 8
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    builder = nbc.FileNameBuilder(NEOSSatName)
    config = mc.Config()
    config.get_executors()
    transferrer = tc.FtpTransfer(config.data_source)
    return rc.run_by_todo(name_builder=builder,
                          config=config,
                          command_name=APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=transferrer)
Esempio n. 9
0
def _run_remote():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    name_builder = nbc.FileNameBuilder(GemProcName)
    vos_client = Client(vospace_certfile=config.proxy_fqn)
    store_transfer = tc.VoFitsTransfer(vos_client)
    data_source = dsc.VaultListDirDataSource(vos_client, config)
    return rc.run_by_todo(config=config,
                          name_builder=name_builder,
                          command_name=APPLICATION,
                          source=data_source,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=store_transfer)
Esempio n. 10
0
def _run_by_incremental():
    """Run incremental processing for observations that are posted on the site
    archive.gemini.edu. TODO in the future this will depend on the incremental
    query endpoint.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    end_timestamp_s = state.bookmarks.get(data_source.GEM_BOOKMARK).get(
        'end_timestamp', datetime.now())
    end_timestamp_dt = mc.make_time_tz(end_timestamp_s)
    logging.info(f'{main_app.APPLICATION} will end at {end_timestamp_s}')
    external_metadata.init_global(config=config)
    name_builder = nbc.FileNameBuilder(gem_name.GemName)
    incremental_source = data_source.IncrementalSource()
    meta_visitors = _define_meta_visitors(config)
    result = rc.run_by_state(
        config=config,
        name_builder=name_builder,
        command_name=main_app.APPLICATION,
        bookmark_name=data_source.GEM_BOOKMARK,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        end_time=end_timestamp_dt,
        source=incremental_source,
        chooser=None,
    )
    if incremental_source.max_records_encountered:
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        logging.warning('Encountered maximum records!!')
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        result |= -1
    return result
Esempio n. 11
0
def test_run_state_v(client_mock, repo_mock):
    repo_mock.return_value.read.side_effect = tc.mock_read
    client_mock.get_node.side_effect = tc.mock_get_node
    # the test file is length 0
    client_mock.return_value.copy.return_value = 0

    test_wd = '/usr/src/app/caom2pipe/int_test'
    caom2pipe_bookmark = 'caom2_timestamp'
    test_config = mc.Config()
    test_config.working_directory = test_wd
    test_config.collection = 'TEST'
    test_config.interval = 10
    test_config.log_file_directory = f'{test_wd}/logs'
    test_config.failure_fqn = \
        f'{test_config.log_file_directory}/failure_log.txt'
    test_config.log_to_file = True
    test_config.logging_level = 'DEBUG'
    test_config.progress_file_name = 'progress.txt'
    test_config.proxy_file_name = f'{test_wd}/cadcproxy.pem'
    test_config.rejected_file_name = 'rejected.yml'
    test_config.rejected_directory = f'{test_wd}/rejected'
    test_config._report_fqn = f'{test_config.log_file_directory}/app_report.txt'
    test_config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    test_config.retry_file_name = 'retries.txt'
    test_config.retry_fqn = \
        f'{test_config.log_file_directory}/{test_config.retry_file_name}'
    test_config.state_file_name = 'state.yml'
    test_config.success_fqn = \
        f'{test_config.log_file_directory}/success_log.txt'
    test_config.tap_id = 'ivo://cadc.nrc.ca/sc2tap'
    test_config.task_types = [
        mc.TaskType.STORE, mc.TaskType.INGEST, mc.TaskType.MODIFY
    ]
    test_config.features.use_file_names = True
    test_config.features.use_urls = False
    test_config.features.supports_latest_client = True
    test_config.use_local_files = False

    if not os.path.exists(test_wd):
        os.mkdir(test_wd)

    # if this test is failing, did the docker container get
    # restarted recently?
    # first create /caom2pipe_test/1000003f.fits.fz,
    # then check that the test_start_time and test_end_time values
    # correspond somewhat to the timestamp on that file
    #
    # this timestamp is 15 minutes earlier than the timestamp of the
    # file in /caom2pipe_test
    #
    test_start_time = '2021-05-08 02:25:09'
    with open(test_config.state_fqn, 'w') as f:
        f.write('bookmarks:\n')
        f.write(f'  {caom2pipe_bookmark}:\n')
        f.write(f'    last_record: {test_start_time}\n')
    test_end_time = datetime(2021,
                             5,
                             8,
                             2,
                             41,
                             27,
                             965132,
                             tzinfo=timezone.utc)

    with open(test_config.proxy_fqn, 'w') as f:
        f.write('test content\n')

    test_data_source = TestListDirTimeBoxDataSource()
    test_builder = nbc.FileNameBuilder(tc.TestStorageName)
    transferrer = TestTransfer()

    try:
        test_result = rc.run_by_state(
            bookmark_name=caom2pipe_bookmark,
            command_name='collection2caom2',
            config=test_config,
            end_time=test_end_time,
            name_builder=test_builder,
            source=test_data_source,
            modify_transfer=None,
            store_transfer=transferrer,
        )

        assert test_result is not None, 'expect a result'
        assert test_result == 0, 'expect success'
        assert client_mock.return_value.copy.called, 'expect put call'
        args, kwargs = client_mock.return_value.copy.call_args
        assert args[0] == 'ad:TEST/test_obs_id.fits.gz', 'wrong args[0]'
        assert (args[1] == '/usr/src/app/caom2pipe/int_test/test_obs_id/'
                'test_obs_id.fits'), 'wrong args[1]'

        # state file checking
        test_state = mc.State(test_config.state_fqn)
        assert test_state is not None, 'expect state content'
        test_checkpoint = test_state.get_bookmark(caom2pipe_bookmark)
        assert test_checkpoint == test_end_time, 'wrong bookmark'

        # success file testing
        assert os.path.exists(test_config.log_file_directory), 'log directory'
        assert os.path.exists(test_config.success_fqn), 'success fqn'
        assert os.path.exists(test_config.progress_fqn), 'progress fqn'
        log_file = f'{test_config.log_file_directory}/test_obs_id.log'
        actual = glob.glob(f'{test_config.log_file_directory}/**')
        assert os.path.exists(log_file), f'specific log file {actual}'
        xml_file = f'{test_config.log_file_directory}/test_obs_id.fits.xml'
        assert os.path.exists(xml_file), f'xml file {actual}'

        # reporting testing
        report_file = f'{test_config.log_file_directory}/app_report.txt'
        assert os.path.exists(report_file), f'report file {actual}'
        pass_through_test = False
        with open(report_file, 'r') as f:
            for line in f:
                pass_through_test = True
                if 'Number' in line:
                    bits = line.split(':')
                    found = False
                    if 'Inputs' in bits[0]:
                        assert bits[1].strip() == '1', 'wrong inputs'
                        found = True
                    elif 'Successes' in bits[0]:
                        assert bits[1].strip() == '1', 'wrong successes'
                        found = True
                    elif 'Timeouts' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong timeouts'
                        found = True
                    elif 'Retries' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong retries'
                        found = True
                    elif 'Errors' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong errors'
                        found = True
                    elif 'Rejections' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong rejections'
                        found = True
                    assert found, f'{line}'
        assert pass_through_test, 'found a report file and checked it'
    finally:
        f_list = glob.glob(f'{test_wd}/**', recursive=True)
        for entry in f_list:
            try:
                if os.path.isdir(entry):
                    os.rmdir(entry)
                else:
                    os.unlink(entry)
            except OSError as e:
                logging.error(f'failed to delete {e}')