def _run_by_public():
    """Run the processing for observations that are public, but there are
    no artifacts representing the previews in CAOM, or a FITS file in ad.

    Called as gem_run_public. The time-boxing is based on timestamps from a
    state.yml file. Call once/day, since data release timestamps have times
    of 00:00:00.000.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = nbc.FileNameBuilder(gem_name.GemName)
    incremental_source = data_source.PublicIncremental(config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_by_state(config=config,
                           name_builder=name_builder,
                           command_name=main_app.APPLICATION,
                           bookmark_name=data_source.GEM_BOOKMARK,
                           meta_visitors=meta_visitors,
                           data_visitors=DATA_VISITORS,
                           end_time=None,
                           source=incremental_source,
                           chooser=None)
def test_builder(obs_metadata_mock, tap_client_mock):
    obs_metadata_mock.side_effect = gem_mocks.mock_get_obs_metadata

    test_config = mc.Config()
    test_config.working_directory = '/test_files'
    test_config.proxy_fqn = os.path.join(gem_mocks.TEST_DATA_DIR,
                                         'test_proxy.pem')
    em.init_global(config=test_config)
    test_subject = builder.GemObsIDBuilder(test_config)

    test_entry = 'S20050825S0143.fits'
    for support in [False, True]:
        test_config.features.supports_latest_client = support
        test_config.features.use_file_names = True
        for task_type in [mc.TaskType.INGEST, mc.TaskType.SCRAPE]:
            test_config.task_types = [task_type]
            test_result = test_subject.build(test_entry)
            assert test_result is not None, \
                f'expect a result support {support}'
            expected_path = COLLECTION if support else ARCHIVE
            assert test_result.file_uri == \
                   f'{SCHEME}:{expected_path}/{test_entry}', 'wrong file uri'
            assert test_result.prev_uri == \
                   f'{SCHEME}:{expected_path}/{test_result.prev}', \
                   'wrong preview uri'
            expected_scheme = V_SCHEME if support else A_SCHEME
            assert test_result.thumb_uri == \
                   f'{expected_scheme}:{expected_path}/{test_result.thumb}', \
                   'wrong thumb uri'

        test_config.task_types = [mc.TaskType.INGEST]
        test_config.features.use_file_names = False
        with pytest.raises(mc.CadcException):
            test_result = test_subject.build(test_entry)
def test_get_obs_metadata_not_at_gemini(tap_client_mock, session_mock):
    session_mock.get.side_effect = gem_mocks.mock_session_get_not_found
    test_config = mc.Config()
    test_config.working_directory = gem_mocks.TEST_DATA_DIR
    test_config.proxy_file_name = 'test_proxy.pem'
    ext_md.init_global(config=test_config)
    with pytest.raises(mc.CadcException,
                       match=f'Could not find JSON record *'):
        test_result = ext_md.get_obs_metadata('test_file_id')
def _run():
    """
    Uses a todo file with file names, even though Gemini provides
    information about existing data referenced by observation ID.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = builder.GemObsIDBuilder(config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_by_todo(config,
                          name_builder,
                          chooser=None,
                          command_name=main_app.APPLICATION,
                          meta_visitors=meta_visitors)
Exemple #5
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = builder.GemProcBuilder(config)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        command_name=main_app.APPLICATION,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
    )
def test_repair_provenance(gem_mock, tap_mock):
    copyfile(f'{gem_mocks.TEST_DATA_DIR}/from_paul.txt',
             '/app/data/from_paul.txt')
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=gem_mocks.TEST_DATA_DIR)
    try:
        gem_mock.side_effect = gem_mocks.mock_get_obs_metadata
        tap_mock.side_effect = gem_mocks.mock_query_tap
        external_metadata.set_ofr(None)
        external_metadata.get_gofr()
        test_config = mc.Config()
        test_config.get_executors()
        external_metadata.init_global(config=test_config)
        for ii in test_subjects:
            ignore, test_fid = main_app._repair_provenance_value(
                ii[1], 'test obs')
            assert test_fid is not None, 'failed lookup {}'.format(ii)
            assert test_fid == ii[0], 'error {}'.format(ii[1])
    finally:
        os.getcwd = getcwd_orig
def test_preview_augmentation(data_client_mock, tap_mock):
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)
    tap_mock.side_effect = _tap_mock
    data_client_mock.return_value.info.side_effect = (
        test_main_app._get_file_info)

    test_f_id = 'rnN20140428S0181_ronchi'
    test_f_name = f'{test_f_id}.fits'
    test_obs = mc.read_obs_from_file(
        f'{test_main_app.TEST_DATA_DIR}/{test_f_id}.expected.xml')
    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_config.get_executors()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    external_metadata.init_global(test_config)
    test_builder = builder.GemProcBuilder(test_config)
    test_fqn = os.path.join(test_main_app.TEST_DATA_DIR, test_f_name)
    test_storage_name = test_builder.build(test_fqn)
    kwargs = {
        'working_directory': TEST_FILES_DIR,
        'cadc_client': None,
        'stream': 'stream',
        'observable': test_observable,
        'storage_name': test_storage_name,
    }

    try:
        start_ts = datetime.utcnow().timestamp()
        test_result = preview_augmentation.visit(test_obs, **kwargs)
        end_ts = datetime.utcnow().timestamp()
        logging.error(f'{test_f_name} execution time {end_ts - start_ts}')
    except Exception as e:
        logging.error(e)
        logging.error(traceback.format_exc())
        assert False
    finally:
        os.getcwd = getcwd_orig

    assert test_result is not None, 'expect a result'
    assert test_result.get('artifacts') == 2, 'wrong result'
def test_caching_relationship(tap_mock, get_obs_mock):
    shutil.copyfile(f'{gem_mocks.TEST_DATA_DIR}/from_paul.txt',
                    '/app/data/from_paul.txt')
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=gem_mocks.TEST_DATA_DIR)
    try:
        test_config = mc.Config()
        test_config.get_executors()
        ext_md.init_global(config=test_config)
        initial_length = 525
        tap_mock.side_effect = gem_mocks._query_mock_none
        get_obs_mock.side_effect = gem_mocks.mock_get_obs_metadata
        test_subject = ext_md.CachingObsFileRelationship()
        test_subject.tap_client = Mock()
        # test an entry that's not in the file, not at CADC, is at
        # archive.gemini.edu
        assert len(test_subject.name_list) == initial_length, \
            'bad initial length'
        test_result = test_subject.get_obs_id('N20200210S0077')
        assert test_result is not None, 'expect a gemini result'
        assert test_result == 'GN-CAL20200210-22-076', 'wrong gemini result'
        assert len(test_subject.name_list) == initial_length + 1, \
            'bad updated length from Gemini'

        # entry is not in file, but is at CADC
        tap_mock.side_effect = gem_mocks.mock_query_tap
        test_result = test_subject.get_obs_id('x')
        assert test_result is not None, 'expect a cadc result'
        assert test_result == 'test_data_label', 'wrong cadc result'
        assert len(test_subject.name_list) == initial_length + 2, \
            'bad updated length from cadc'

        # entry is in file
        test_result = test_subject.get_obs_id('N20170616S0540')
        assert test_result is not None, 'expect a file result'
        assert test_result == 'GN-CAL20170616-11-022', 'wrong file result'
        assert len(test_subject.name_list) == initial_length + 2, \
            'bad updated length from file'
    finally:
        os.getcwd = getcwd_orig
Exemple #9
0
def _run_remote():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = builder.GemProcBuilder(config)
    vos_client = Client(vospace_certfile=config.proxy_fqn)
    store_transfer = tc.VoFitsTransfer(vos_client)
    data_source = dsc.VaultListDirDataSource(vos_client, config)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        command_name=main_app.APPLICATION,
        source=data_source,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        store_transfer=store_transfer,
    )
Exemple #10
0
def _run_by_incremental():
    """Run incremental processing for observations that are posted on the site
    archive.gemini.edu. TODO in the future this will depend on the incremental
    query endpoint.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    end_timestamp_s = state.bookmarks.get(data_source.GEM_BOOKMARK).get(
        'end_timestamp', datetime.now())
    end_timestamp_dt = mc.make_time_tz(end_timestamp_s)
    logging.info(f'{main_app.APPLICATION} will end at {end_timestamp_s}')
    external_metadata.init_global(config=config)
    name_builder = nbc.FileNameBuilder(gem_name.GemName)
    incremental_source = data_source.IncrementalSource()
    meta_visitors = _define_meta_visitors(config)
    result = rc.run_by_state(
        config=config,
        name_builder=name_builder,
        command_name=main_app.APPLICATION,
        bookmark_name=data_source.GEM_BOOKMARK,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        end_time=end_timestamp_dt,
        source=incremental_source,
        chooser=None,
    )
    if incremental_source.max_records_encountered:
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        logging.warning('Encountered maximum records!!')
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        result |= -1
    return result
Exemple #11
0
def _run_single():
    """
    Run the processing for a single entry.
    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    if config.features.run_in_airflow:
        temp = tempfile.NamedTemporaryFile()
        mc.write_to_file(temp.name, sys.argv[2])
        config.proxy = temp.name
    else:
        config.proxy = sys.argv[2]
    config.stream = 'default'
    if config.features.use_file_names:
        storage_name = gem_name.GemName(file_name=sys.argv[1])
    else:
        raise mc.CadcException('No code to handle running GEM by obs id.')
    external_metadata.init_global(config=config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_single(config, storage_name, main_app.APPLICATION,
                         meta_visitors, DATA_VISITORS)
def test_main_app_v(client_mock, tap_mock, gemini_client_mock, gemini_pi_mock,
                    svofps_mock, cadc_client_mock, get_file_info_mock,
                    test_name):
    # client_mock present because of global in external_metadata
    cadc_client_mock.get_node.side_effect = gem_mocks.mock_get_node
    gemini_client_mock.side_effect = gem_mocks.mock_get_obs_metadata
    gemini_pi_mock.side_effect = gem_mocks.mock_get_pi_metadata
    svofps_mock.side_effect = gem_mocks.mock_get_votable
    tap_mock.side_effect = gem_mocks.mock_query_tap
    get_file_info_mock.return_value.get_file_info.side_effect = \
        gem_mocks.mock_get_file_info

    getcwd_orig = os.getcwd
    os.getcwd = Mock(
        return_value=os.path.join(gem_mocks.TEST_DATA_DIR, 'si_config'))

    try:
        test_config = mc.Config()
        test_config.get_executors()
        test_config.features.supports_latest_client = True

        em.set_ofr(None)
        em.init_global(test_config)
        test_data_size = os.stat(
            os.path.join(gem_mocks.TEST_DATA_DIR, 'from_paul.txt'))
        app_size = os.stat('/app/data/from_paul.txt')
        if test_data_size.st_size != app_size.st_size:
            copyfile(os.path.join(gem_mocks.TEST_DATA_DIR, 'from_paul.txt'),
                     '/app/data/from_paul.txt')
        basename = os.path.basename(test_name)
        dirname = os.path.dirname(test_name)
        file_id = _get_file_id(basename)
        obs_id = _get_obs_id(file_id)
        product_id = file_id
        lineage = _get_lineage(dirname, basename, test_config)
        input_file = '{}.in.xml'.format(product_id)
        actual_fqn = _get_actual_file_name(dirname, product_id)
        local = _get_local(test_name)
        plugin = gem_mocks.PLUGIN

        if os.path.exists(actual_fqn):
            os.remove(actual_fqn)

        if os.path.exists(os.path.join(dirname, input_file)):
            sys.argv = \
                ('{} --quiet --no_validate --local {} '
                 '--plugin {} --module {} --in {}/{} --out {} --lineage {}'.
                 format(main_app.APPLICATION, local, plugin, plugin, dirname,
                        input_file, actual_fqn, lineage)).split()
        else:
            sys.argv = \
                ('{} --quiet --no_validate --local {} '
                 '--plugin {} --module {} --observation {} {} --out {} '
                 '--lineage {}'.
                 format(main_app.APPLICATION, local, plugin, plugin,
                        main_app.COLLECTION, obs_id, actual_fqn,
                        lineage)).split()
        print(sys.argv)
        main_app.to_caom2()
        expected_fqn = _get_expected_file_name(dirname, product_id)

        compare_result = _new_si_compare_differences(actual_fqn, expected_fqn,
                                                     test_config)
        if compare_result is not None:
            raise AssertionError(compare_result)
        # assert False  # cause I want to see logging messages
    finally:
        os.getcwd = getcwd_orig