Example #1
0
def _run_by_state():
    """Uses a state file with a timestamp to control which quicklook
    files will be retrieved from VLASS.

    Ingestion is based on URLs, because a URL that contains the phrase
    'QA_REJECTED' is the only way to tell if the attribute 'requirements'
    should be set to 'fail', or not.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    # a way to get a datetime from a string, or maybe a datetime, depending
    # on the execution environment
    start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0)
    todo_list, max_date = scrape.build_file_url_list(start_time)
    if len(todo_list) > 0:
        state = mc.State(config.state_fqn)
        work.init_web_log(state, config)
    # still make all subsequent calls if len == 0, for consistent reporting
    source = data_source.NraoPage(todo_list)
    name_builder = builder.VlassInstanceBuilder(config)
    return rc.run_by_state(config=config,
                           command_name=sn.APPLICATION,
                           bookmark_name=VLASS_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           name_builder=name_builder,
                           source=source,
                           end_time=max_date,
                           store_transfer=tc.HttpTransfer())
def _setup(test_input, local=False):
    # make sure the working directory TEST_EXEC_DIR has the correct things
    # in it
    if test_input.config_file is not None:
        config_file_target = TEST_EXEC_DIR / 'config.yml'
        shutil.copy(test_input.config_file, config_file_target)

    test_start_time = None
    state_file_target = None
    if test_input.state_file is not None:
        state_file_target = TEST_EXEC_DIR / 'state.yml'
        shutil.copy(test_input.state_file, state_file_target)
        # make the state file won't take decades to execute
        test_start_time = datetime.now(
            tz=dateutil.tz.UTC, ) - timedelta(minutes=5)
        state = mc.State(state_file_target.as_posix())
        state.save_state(test_input.bookmark, test_start_time)

    if test_input.cache_file is not None:
        cache_file_target = TEST_EXEC_DIR / 'cache.yml'
        shutil.copy(test_input.cache_file, cache_file_target)

    with open(TEST_EXEC_DIR / 'cadcproxy.pem', 'w') as f:
        f.write('test content')

    if test_input.test_file is not None and local:
        shutil.copy(test_input.test_file,
                    TEST_DATA_DIR / test_input.test_file.name)

    return test_start_time, state_file_target
def test_run_single(do_mock, test_config):
    _clean_up_log_files(test_config)
    progress_file = os.path.join(tc.TEST_DATA_DIR, 'progress.txt')

    test_config.features.expects_retry = False
    test_config.progress_fqn = progress_file

    test_config.state_fqn = STATE_FILE
    test_config.interval = 5
    test_state = mc.State(test_config.state_fqn)
    test_state.save_state('gemini_timestamp', datetime.utcnow())

    do_mock.return_value = -1

    test_url = 'http://localhost/test_url.fits'
    test_storage_name = mc.StorageName(url=test_url)

    test_result = rc.run_single(
        test_config,
        test_storage_name,
        'test_command',
        meta_visitors=None,
        data_visitors=None,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == -1, 'wrong result'

    assert do_mock.called, 'do mock not called'
    assert do_mock.call_count == 1, do_mock.call_count
    args, kwargs = do_mock.call_args
    test_storage = args[0]
    assert isinstance(test_storage, mc.StorageName), type(test_storage)
    assert test_storage.obs_id is None, 'wrong obs id'
    assert test_storage.url == test_url, test_storage.url
Example #4
0
def _run_state():
    """Uses a state file with a timestamp to control which quicklook
    files will be retrieved from VLASS.

    Ingestion is based on URLs, because a URL that contains the phrase
    'QA_REJECTED' is the only way to tell if the attribute 'requirements'
    should be set to 'fail', or not.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    # a way to get a datetime from a string, or maybe a datetime, depending
    # on the execution environment
    start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0)
    todo_list, max_date = scrape.build_file_url_list(start_time)
    source = data_source.NraoPage(todo_list)
    name_builder = nbc.EntryBuilder(storage_name.VlassName)
    storage_name.set_use_storage_inventory(
        config.features.supports_latest_client)
    return rc.run_by_state(
        config=config,
        bookmark_name=VLASS_BOOKMARK,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        name_builder=name_builder,
        source=source,
        end_time=max_date,
        store_transfer=tc.HttpTransfer(),
    )
Example #5
0
def build_todo(start_date, sidecar_dir, state_fqn):
    """
    Build a list of file names where the modification time for the file
    is >= start_time.

    :param start_date timestamp in seconds since the epoch
    :param sidecar_dir where to cache ftp directory listing progress
    :param state_fqn where to find the configurable list of sub-directories,
        for bookmarked queries
    :return a dict, where keys are file names on the ftp host server, and
        values are timestamps, plus the max timestamp from the ftp host
        server for file addition
    """
    logging.debug(f'Begin build_todo with date {start_date}')
    temp = {}
    state = mc.State(state_fqn)
    sub_dirs = state.get_context(NEOS_CONTEXT)
    # query the sub-directories of the root directory, because the timestamps
    # do not bubble up for modifications, only for additions
    for subdir in sub_dirs:
        query_dir = os.path.join(NEOS_DIR, str(subdir))
        temp.update(
            _append_todo(start_date, sidecar_dir, ASC_FTP_SITE, query_dir, {},
                         {}))
    todo_list, max_date = _remove_dir_names(temp, start_date)
    logging.info(
        f'End build_todo with {len(todo_list)} records, date {max_date}.')
    return todo_list, max_date
Example #6
0
def _run_state():
    """Uses a state file with a timestamp to control which files will be
    retrieved from the CSA ftp host.

    Ingestion is based on fully-qualified file names from the CSA ftp host,
    because those are difficult to reproduce otherwise.
    """
    builder = nbc.FileNameBuilder(NEOSSatName)
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    start_time = state.get_bookmark(NEOS_BOOKMARK)
    temp = mc.increment_time(start_time, 0).timestamp()
    todo_list, max_timestamp = scrape.build_todo(
        temp, config.working_directory, config.state_fqn)
    max_date = datetime.fromtimestamp(max_timestamp)
    incremental_source = data_source.IncrementalSource(todo_list)
    transferrer = tc.FtpTransfer(config.data_source)
    return rc.run_by_state(config=config, name_builder=builder,
                           command_name=APPLICATION,
                           bookmark_name=NEOS_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           end_time=max_date, chooser=None,
                           source=incremental_source,
                           store_transfer=transferrer)
Example #7
0
def test_aug_visit_works(query_endpoint_mock, get_mock):
    get_mock.return_value.__enter__.return_value.raw = test_scrape.WL_INDEX
    query_endpoint_mock.side_effect = test_scrape._query_endpoint
    test_config = mc.Config()
    test_config.get_executors()
    test_state = mc.State(test_config.state_fqn)
    work.init_web_log(test_state, test_config)
    test_name = sn.VlassName(
        file_name='VLASS1.2.ql.T07t13.J081828-133000.10.2048.v1.I.iter1.'
        'image.pbcor.tt0.subim.fits',
        entry='VLASS1.2.ql.T07t13.J081828-133000.10.2048.v1.I.iter1.'
        'image.pbcor.tt0.subim.fits')
    test_file = os.path.join(TEST_DATA_DIR, f'{test_name.obs_id}.xml')
    test_obs = mc.read_obs_from_file(test_file)
    assert test_obs is not None, 'unexpected None'

    data_dir = os.path.join(THIS_DIR, '../../data')
    kwargs = {'working_directory': data_dir, 'cadc_client': Mock()}
    test_result = time_bounds_augmentation.visit(test_obs, **kwargs)
    assert test_obs is not None, 'unexpected modification'
    assert test_result is not None, 'should have a result status'
    assert len(test_result) == 1, 'modified artifacts count'
    assert test_result['artifacts'] == 2, 'artifact count'
    plane = test_obs.planes[test_name.product_id]
    chunk = plane.artifacts[test_name.file_uri].parts['0'].chunks[0]
    assert chunk is not None
    assert chunk.time is not None, 'no time information'
    assert chunk.time.axis is not None, 'no axis information'
    assert chunk.time.axis.bounds is not None, 'no bounds information'
    assert len(chunk.time.axis.bounds.samples) == 1, \
        'wrong amount of bounds info'
    assert chunk.time.exposure == 234.0, 'wrong exposure value'
def test_state():
    if os.path.exists(TEST_STATE_FILE):
        os.unlink(TEST_STATE_FILE)
    with open(TEST_STATE_FILE, 'w') as f:
        f.write('bookmarks:\n'
                '  gemini_timestamp:\n'
                '    last_record: 2019-07-23 20:52:03.524443\n'
                'context:\n'
                '  neossat_context:\n'
                '    - NEOSS\n'
                '    - 2020\n')

    with pytest.raises(mc.CadcException):
        test_subject = mc.State('nonexistent')

    test_subject = mc.State(TEST_STATE_FILE)
    assert test_subject is not None, 'expect result'
    test_result = test_subject.get_bookmark('gemini_timestamp')
    assert test_result is not None, 'expect content'
    assert isinstance(test_result, datetime)

    test_context = test_subject.get_context('neossat_context')
    assert test_context is not None, 'expect a result'
    assert isinstance(test_context, list), 'wrong return type'
    assert len(test_context) == 2, 'wrong return length'
    assert 'NEOSS' in test_context, 'wrong content'
    test_context.append('2019')

    test_subject.save_state('gemini_timestamp', test_result + timedelta(3))
    test_subject.save_state('neossat_context', test_context)

    with open(TEST_STATE_FILE, 'r') as f:
        text = f.readlines()
        compare = ''.join(ii for ii in text)
        assert '2019-07-23' not in compare, 'content not updated'
        assert '2019' in compare, 'context content not updated'
Example #9
0
def _run():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    work.init_web_log(state, config)
    name_builder = builder.VlassInstanceBuilder(config)
    return rc.run_by_todo(config=config,
                          name_builder=name_builder,
                          command_name=sn.APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=tc.HttpTransfer())
Example #10
0
def _run_state():
    """Run incremental processing for observations that are posted on the site
    archive.gemini.edu. TODO in the future this will depend on the incremental
    query endpoint.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    (
        clients,
        config,
        metadata_reader,
        meta_visitors,
        name_builder,
    ) = _common_init()
    state = mc.State(config.state_fqn)
    end_timestamp_s = state.bookmarks.get(data_source.GEM_BOOKMARK).get(
        'end_timestamp', datetime.now())
    end_timestamp_dt = mc.make_time_tz(end_timestamp_s)
    logging.info(f'{main_app.APPLICATION} will end at {end_timestamp_s}')
    incremental_source = data_source.IncrementalSource(metadata_reader)
    result = rc.run_by_state(
        config=config,
        name_builder=name_builder,
        bookmark_name=data_source.GEM_BOOKMARK,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        end_time=end_timestamp_dt,
        source=incremental_source,
        clients=clients,
        metadata_reader=metadata_reader,
    )
    if incremental_source.max_records_encountered:
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        logging.warning('Encountered maximum records!!')
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        result |= -1
    return result
Example #11
0
def _run_by_incremental():
    """Run incremental processing for observations that are posted on the site
    archive.gemini.edu. TODO in the future this will depend on the incremental
    query endpoint.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    end_timestamp_s = state.bookmarks.get(data_source.GEM_BOOKMARK).get(
        'end_timestamp', datetime.now())
    end_timestamp_dt = mc.make_time_tz(end_timestamp_s)
    logging.info(f'{main_app.APPLICATION} will end at {end_timestamp_s}')
    external_metadata.init_global(config=config)
    name_builder = nbc.FileNameBuilder(gem_name.GemName)
    incremental_source = data_source.IncrementalSource()
    meta_visitors = _define_meta_visitors(config)
    result = rc.run_by_state(
        config=config,
        name_builder=name_builder,
        command_name=main_app.APPLICATION,
        bookmark_name=data_source.GEM_BOOKMARK,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        end_time=end_timestamp_dt,
        source=incremental_source,
        chooser=None,
    )
    if incremental_source.max_records_encountered:
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        logging.warning('Encountered maximum records!!')
        logging.warning('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        result |= -1
    return result
def test_neoss_state(
    data_mock,
    csa_mock,
    caom_mock,
    transfer_mock,
    local_header_mock,
    test_input_name,
):
    if 'NEOSS' not in test_input_name:
        return

    test_input = INPUTS.get(test_input_name)

    # make sure the working directory TEXT_EXEC_DIR has nothing in it
    _cleanup()

    # make sure the working directory TEST_EXEC_DIR has the correct things
    # in it
    config_file_target = TEST_EXEC_DIR / 'config.yml'
    shutil.copy(test_input.config_file, config_file_target)
    state_file_target = TEST_EXEC_DIR / 'state.yml'
    shutil.copy(test_input.state_file, state_file_target)

    with open(TEST_EXEC_DIR / 'cadcproxy.pem', 'w') as f:
        f.write('test content')

    # make the state file won't take decades to execute
    test_start_time = datetime.now(tz=dateutil.tz.UTC) - timedelta(minutes=5)
    state = mc.State(state_file_target.as_posix())
    state.save_state(test_input.bookmark, test_start_time)

    def _csa_mock(start_date, ign1, ign2, ign3, ign4, ign5):
        return {
            '/users/OpenData_DonneesOuvertes/pub/NEOSSAT/ASTRO/2019/256/'
            'NEOS_SCI_2019213215700.fits':
            [False, start_date + timedelta(minutes=5).total_seconds()],
        }

    csa_mock.side_effect = _csa_mock

    def _transfer_get(src, dst):
        assert (src ==
                '/users/OpenData_DonneesOuvertes/pub/NEOSSAT/ASTRO/2019/256/'
                'NEOS_SCI_2019213215700.fits'), 'wrong source'
        assert (
            dst == '/usr/src/app/integration_test/mock_test/data/execution/'
            '2019213215700/NEOS_SCI_2019213215700.fits'), 'wrong dst'
        with open(dst, 'w') as f2:
            f2.write('test content')

    transfer_mock.return_value.get.side_effect = _transfer_get

    caom_mock.return_value.metadata_client.read.side_effect = [
        None,
        SimpleObservation(
            'obs_id',
            'NEOSSAT',
            Algorithm(name='exposure'),
        ),
    ]

    def _local_header(ignore):
        x = """SIMPLE  =                    T / Written by IDL:  Fri Oct  6 01:48:35 2017
BITPIX  =                  -32 / Bits per pixel
NAXIS   =                    2 / Number of dimensions
NAXIS1  =                   14 /
NAXIS2  =                   24 /
RA      = '22:53:27.5'
DEC     = '-30:04:37.6'
MODE    = '14 - FINE_SETTLE'
OBJECT  = '2020-P4-C'
EXPOSURE=             128.0311
DATATYPE= 'REDUC   '           /Data type, SCIENCE/CALIB/REJECT/FOCUS/TEST
END
"""
        delim = '\nEND'
        extensions = \
            [e + delim for e in x.split(delim) if e.strip()]
        headers = [fits.Header.fromstring(e, sep='\n') for e in extensions]
        return headers

    local_header_mock.side_effect = _local_header

    def _info(uri):
        return FileInfo(
            id=uri,
            md5sum='abc',
            size=42,
        )

    data_mock.return_value.info.side_effect = _info

    # import the module for execution
    sys.path.append(test_input.test_path)
    test_module = import_module('composable')

    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=TEST_EXEC_DIR)
    try:
        test_result = test_module._run_state()
        assert test_result is not None, f'expect a result {test_input_name}'
        assert test_result == 0, f'wrong test result {test_input_name}'

        # was state updated?
        post_state = mc.State(state_file_target.as_posix())
        assert (post_state.get_bookmark(test_input.bookmark) >
                test_start_time), f'state not updated {test_input_name}'
        assert (caom_mock.return_value.data_client.put.called
                ), f'{test_input_name} put not called'
        caom_mock.return_value.data_client.put.assert_called_with(
            '/usr/src/app/integration_test/mock_test/data/execution/'
            '2019213215700',
            'cadc:NEOSSAT/NEOS_SCI_2019213215700.fits',
            None,
        ), f'{test_input_name} wrong put args'
    except Exception as e:
        logging.error(traceback.format_exc())
        raise e
    finally:
        os.getcwd = getcwd_orig
        del sys.modules['composable']
Example #13
0
def test_run_state_v(client_mock, repo_mock):
    repo_mock.return_value.read.side_effect = tc.mock_read
    client_mock.get_node.side_effect = tc.mock_get_node
    # the test file is length 0
    client_mock.return_value.copy.return_value = 0

    test_wd = '/usr/src/app/caom2pipe/int_test'
    caom2pipe_bookmark = 'caom2_timestamp'
    test_config = mc.Config()
    test_config.working_directory = test_wd
    test_config.collection = 'TEST'
    test_config.interval = 10
    test_config.log_file_directory = f'{test_wd}/logs'
    test_config.failure_fqn = \
        f'{test_config.log_file_directory}/failure_log.txt'
    test_config.log_to_file = True
    test_config.logging_level = 'DEBUG'
    test_config.progress_file_name = 'progress.txt'
    test_config.proxy_file_name = f'{test_wd}/cadcproxy.pem'
    test_config.rejected_file_name = 'rejected.yml'
    test_config.rejected_directory = f'{test_wd}/rejected'
    test_config._report_fqn = f'{test_config.log_file_directory}/app_report.txt'
    test_config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    test_config.retry_file_name = 'retries.txt'
    test_config.retry_fqn = \
        f'{test_config.log_file_directory}/{test_config.retry_file_name}'
    test_config.state_file_name = 'state.yml'
    test_config.success_fqn = \
        f'{test_config.log_file_directory}/success_log.txt'
    test_config.tap_id = 'ivo://cadc.nrc.ca/sc2tap'
    test_config.task_types = [
        mc.TaskType.STORE, mc.TaskType.INGEST, mc.TaskType.MODIFY
    ]
    test_config.features.use_file_names = True
    test_config.features.use_urls = False
    test_config.features.supports_latest_client = True
    test_config.use_local_files = False

    if not os.path.exists(test_wd):
        os.mkdir(test_wd)

    # if this test is failing, did the docker container get
    # restarted recently?
    # first create /caom2pipe_test/1000003f.fits.fz,
    # then check that the test_start_time and test_end_time values
    # correspond somewhat to the timestamp on that file
    #
    # this timestamp is 15 minutes earlier than the timestamp of the
    # file in /caom2pipe_test
    #
    test_start_time = '2021-05-08 02:25:09'
    with open(test_config.state_fqn, 'w') as f:
        f.write('bookmarks:\n')
        f.write(f'  {caom2pipe_bookmark}:\n')
        f.write(f'    last_record: {test_start_time}\n')
    test_end_time = datetime(2021,
                             5,
                             8,
                             2,
                             41,
                             27,
                             965132,
                             tzinfo=timezone.utc)

    with open(test_config.proxy_fqn, 'w') as f:
        f.write('test content\n')

    test_data_source = TestListDirTimeBoxDataSource()
    test_builder = nbc.FileNameBuilder(tc.TestStorageName)
    transferrer = TestTransfer()

    try:
        test_result = rc.run_by_state(
            bookmark_name=caom2pipe_bookmark,
            command_name='collection2caom2',
            config=test_config,
            end_time=test_end_time,
            name_builder=test_builder,
            source=test_data_source,
            modify_transfer=None,
            store_transfer=transferrer,
        )

        assert test_result is not None, 'expect a result'
        assert test_result == 0, 'expect success'
        assert client_mock.return_value.copy.called, 'expect put call'
        args, kwargs = client_mock.return_value.copy.call_args
        assert args[0] == 'ad:TEST/test_obs_id.fits.gz', 'wrong args[0]'
        assert (args[1] == '/usr/src/app/caom2pipe/int_test/test_obs_id/'
                'test_obs_id.fits'), 'wrong args[1]'

        # state file checking
        test_state = mc.State(test_config.state_fqn)
        assert test_state is not None, 'expect state content'
        test_checkpoint = test_state.get_bookmark(caom2pipe_bookmark)
        assert test_checkpoint == test_end_time, 'wrong bookmark'

        # success file testing
        assert os.path.exists(test_config.log_file_directory), 'log directory'
        assert os.path.exists(test_config.success_fqn), 'success fqn'
        assert os.path.exists(test_config.progress_fqn), 'progress fqn'
        log_file = f'{test_config.log_file_directory}/test_obs_id.log'
        actual = glob.glob(f'{test_config.log_file_directory}/**')
        assert os.path.exists(log_file), f'specific log file {actual}'
        xml_file = f'{test_config.log_file_directory}/test_obs_id.fits.xml'
        assert os.path.exists(xml_file), f'xml file {actual}'

        # reporting testing
        report_file = f'{test_config.log_file_directory}/app_report.txt'
        assert os.path.exists(report_file), f'report file {actual}'
        pass_through_test = False
        with open(report_file, 'r') as f:
            for line in f:
                pass_through_test = True
                if 'Number' in line:
                    bits = line.split(':')
                    found = False
                    if 'Inputs' in bits[0]:
                        assert bits[1].strip() == '1', 'wrong inputs'
                        found = True
                    elif 'Successes' in bits[0]:
                        assert bits[1].strip() == '1', 'wrong successes'
                        found = True
                    elif 'Timeouts' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong timeouts'
                        found = True
                    elif 'Retries' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong retries'
                        found = True
                    elif 'Errors' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong errors'
                        found = True
                    elif 'Rejections' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong rejections'
                        found = True
                    assert found, f'{line}'
        assert pass_through_test, 'found a report file and checked it'
    finally:
        f_list = glob.glob(f'{test_wd}/**', recursive=True)
        for entry in f_list:
            try:
                if os.path.isdir(entry):
                    os.rmdir(entry)
                else:
                    os.unlink(entry)
            except OSError as e:
                logging.error(f'failed to delete {e}')
def test_run_state(
    fits2caom2_mock,
    fits2caom2_in_out_mock,
    tap_query_mock,
    tap_mock,
    clients_mock,
    test_config,
):
    # tap_mock is used by the data_source_composable class
    fits2caom2_mock.side_effect = _mock_write
    clients_mock.return_value.metadata_client.read.side_effect = Mock(
        return_value=None)
    tap_query_mock.side_effect = _mock_get_work

    test_end_time = datetime.fromtimestamp(1579740838, tz=timezone.utc)
    start_time = test_end_time - timedelta(seconds=900)
    _write_state(start_time)

    test_config.task_types = [mc.TaskType.INGEST]
    test_config.state_fqn = STATE_FILE
    test_config.interval = 10
    individual_log_file = (
        f'{test_config.log_file_directory}/NEOS_SCI_2015347000000_clean.log')
    if os.path.exists(test_config.progress_fqn):
        os.unlink(test_config.progress_fqn)
    if os.path.exists(test_config.success_fqn):
        os.unlink(test_config.success_fqn)
    if os.path.exists(individual_log_file):
        os.unlink(individual_log_file)

    test_chooser = ec.OrganizeChooser()
    # use_local_files set so run_by_state chooses QueryTimeBoxDataSourceTS
    test_config.use_local_files = False
    test_result = rc.run_by_state(
        config=test_config,
        chooser=test_chooser,
        command_name=TEST_COMMAND,
        bookmark_name=TEST_BOOKMARK,
        end_time=test_end_time,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    if fits2caom2_mock.called:
        fits2caom2_mock.assert_called_once_with()
    elif fits2caom2_in_out_mock.called:
        fits2caom2_in_out_mock.assert_called_once_with(ANY)

    test_state = mc.State(STATE_FILE)
    test_bookmark = test_state.get_bookmark(TEST_BOOKMARK)
    assert test_bookmark == test_end_time, 'wrong time'
    assert os.path.exists(test_config.progress_fqn), 'expect progress file'
    assert os.path.exists(
        test_config.success_fqn), 'log_to_file set to false, no success file'
    assert not os.path.exists(
        individual_log_file), f'log_to_file is False, no entry log'

    # test that runner does nothing when times haven't changed
    start_time = test_end_time
    _write_state(start_time)
    fits2caom2_mock.reset_mock()
    fits2caom2_in_out_mock.reset_mock()
    test_result = rc.run_by_state(
        config=test_config,
        chooser=test_chooser,
        command_name=TEST_COMMAND,
        bookmark_name=TEST_BOOKMARK,
        end_time=test_end_time,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    assert not fits2caom2_mock.called, 'expect no fits2caom2 call'
    assert (
        not fits2caom2_in_out_mock.called), 'expect no update fits2caom2 call'
Example #15
0
    def run(self):
        """
        Uses an iterable with an instance of StateRunnerMeta.

        :return: 0 for success, -1 for failure
        """
        self._logger.debug(f'Begin run state for {self._bookmark_name}')
        if not os.path.exists(os.path.dirname(self._config.progress_fqn)):
            os.makedirs(os.path.dirname(self._config.progress_fqn))

        state = mc.State(self._config.state_fqn)
        if self._data_source.start_time_ts is None:
            temp = state.get_bookmark(self._bookmark_name)
            start_time = mc.convert_to_ts(temp)
        else:
            start_time = self._data_source.start_time_ts

        # make sure prev_exec_time is offset-aware type datetime.timestamp
        prev_exec_time = start_time
        incremented_ts = mc.increment_time_tz(
            prev_exec_time, self._config.interval).timestamp()
        exec_time = min(incremented_ts, self._end_time)

        self._logger.debug(
            f'Starting at {datetime.utcfromtimestamp(start_time)}, ending at '
            f'{datetime.utcfromtimestamp(self._end_time)}')
        result = 0
        cumulative = 0
        cumulative_correct = 0
        if prev_exec_time == self._end_time:
            self._logger.info(
                f'Start time is the same as end time '
                f'{datetime.utcfromtimestamp(start_time)}, stopping.')
            exec_time = prev_exec_time
        else:
            cumulative = 0
            result = 0
            while exec_time <= self._end_time:
                self._logger.info(
                    f'Processing from '
                    f'{datetime.utcfromtimestamp(prev_exec_time)} to '
                    f'{datetime.utcfromtimestamp(exec_time)}')
                save_time = exec_time
                self._organizer.success_count = 0
                entries = self._data_source.get_time_box_work(
                    prev_exec_time, exec_time)
                num_entries = len(entries)

                if num_entries > 0:
                    self._logger.info(f'Processing {num_entries} entries.')
                    self._organizer.complete_record_count = num_entries
                    self._organizer.set_log_location()
                    pop_action = entries.pop
                    if isinstance(entries, deque):
                        pop_action = entries.popleft
                    while len(entries) > 0:
                        entry = pop_action()
                        result |= self._process_entry(entry.entry_name)
                        save_time = min(mc.convert_to_ts(entry.entry_ts),
                                        exec_time)
                    self._finish_run()

                cumulative += num_entries
                cumulative_correct += self._organizer.success_count
                self._record_progress(num_entries, cumulative, start_time,
                                      save_time)
                state.save_state(self._bookmark_name,
                                 datetime.utcfromtimestamp(save_time))

                if exec_time == self._end_time:
                    # the last interval will always have the exec time
                    # equal to the end time, which will fail the while check
                    # so leave after the last interval has been processed
                    #
                    # but the while <= check is required so that an interval
                    # smaller than exec_time -> end_time will get executed,
                    # so don't get rid of the '=' in the while loop
                    # comparison, just because this one exists
                    break
                prev_exec_time = exec_time
                new_time = mc.increment_time_tz(
                    prev_exec_time, self._config.interval).timestamp()
                exec_time = min(new_time, self._end_time)

        self._reporter.add_entries(cumulative)
        self._reporter.add_successes(cumulative_correct)
        state.save_state(self._bookmark_name,
                         datetime.utcfromtimestamp(exec_time))
        self._logger.info('==================================================')
        self._logger.info(
            f'Done {self._organizer.command_name}, saved state is '
            f'{datetime.utcfromtimestamp(exec_time)}')
        self._logger.info(
            f'{cumulative_correct} of {cumulative} records processed '
            f'correctly.')
        self._logger.info('==================================================')
        return result
collection = sys.argv[1]
COLLECTION_KEYS = {
        'gem': 'gemini_bookmark',
        'dao': 'dao_timestamp',
        'neossat': 'neossat_timestamp',
        'cfht': 'cfht_timestamp',
        'vlass': 'vlass_timestamp'
}
collection_key = COLLECTION_KEYS.get(collection, f'{collection}_bookmark')

config = mc.Config()
config.get_executors()

tomorrow = datetime.utcnow() + timedelta(days=1)
if collection == 'gem':
    # gemini counts back 14 days for incremental harvesting because
    # that's how their endpoints can work ....
    tomorrow = datetime.utcnow() + timedelta(days=15)

if not os.path.exists(config.state_fqn):
    with open(config.state_fqn, 'w') as f:
        f.write('bookmarks:\n')
        f.write(f'    {collection_key}:\n')
        f.write(f'        last_record: {tomorrow}\n')

state = mc.State(config.state_fqn)
state.save_state(collection_key, tomorrow)

print(f'::: state saved key {collection_key} value {tomorrow}')
sys.exit(0)
Example #17
0
def test_run_state(
    fits2caom2_mock,
    tap_query_mock,
    tap_mock,
    set_clients_mock,
    repo_get_mock,
    test_config
):
    # tap mock is used by the data_source_composable class
    set_clients_mock.side_effect = _clients_mock
    fits2caom2_mock.side_effect = _mock_write
    repo_get_mock.side_effect = Mock(return_value=None)
    tap_query_mock.side_effect = _mock_get_work

    test_end_time = datetime.fromtimestamp(1579740838, tz=timezone.utc)
    start_time = test_end_time - timedelta(seconds=900)
    _write_state(start_time)

    test_config.task_types = [mc.TaskType.INGEST]
    test_config.state_fqn = STATE_FILE
    test_config.interval = 10
    if os.path.exists(test_config.progress_fqn):
        os.unlink(test_config.progress_fqn)
    if os.path.exists(test_config.success_fqn):
        os.unlink(test_config.success_fqn)

    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_state(
        config=test_config,
        chooser=test_chooser,
        command_name=TEST_COMMAND,
        bookmark_name=TEST_BOOKMARK,
        end_time=test_end_time,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    assert fits2caom2_mock.called, 'expect fits2caom2 call'
    fits2caom2_mock.assert_called_once_with()

    test_state = mc.State(STATE_FILE)
    test_bookmark = test_state.get_bookmark(TEST_BOOKMARK)
    assert test_bookmark == test_end_time, 'wrong time'
    assert os.path.exists(test_config.progress_fqn), 'expect progress file'
    assert (
        not os.path.exists(test_config.success_fqn)
    ), 'log_to_file set to false, no success file'

    # test that runner does nothing when times haven't changed
    start_time = test_end_time
    _write_state(start_time)
    fits2caom2_mock.reset_mock()
    test_result = rc.run_by_state(
        config=test_config,
        chooser=test_chooser,
        command_name=TEST_COMMAND,
        bookmark_name=TEST_BOOKMARK,
        end_time=test_end_time,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    assert not fits2caom2_mock.called, 'expect no fits2caom2 call'
Example #18
0
def test_run_state_v(client_mock):
    client_mock.metadata_client.read.side_effect = tc.mock_read
    client_mock.data_client.info.return_value = FileInfo(
        id='cadc:TEST/anything.fits',
        size=42,
        md5sum='9473fdd0d880a43c21b7778d34872157',
    )

    test_wd = '/usr/src/app/caom2pipe/int_test'
    caom2pipe_bookmark = 'caom2_timestamp'
    test_config = mc.Config()
    test_config.working_directory = test_wd
    test_config.collection = 'TEST'
    test_config.interval = 10
    test_config.log_file_directory = f'{test_wd}/logs'
    test_config.failure_fqn = (
        f'{test_config.log_file_directory}/failure_log.txt')
    test_config.log_to_file = True
    test_config.logging_level = 'INFO'
    test_config.progress_file_name = 'progress.txt'
    test_config.proxy_file_name = f'{test_wd}/cadcproxy.pem'
    test_config.rejected_file_name = 'rejected.yml'
    test_config.rejected_directory = f'{test_wd}/rejected'
    test_config._report_fqn = (
        f'{test_config.log_file_directory}/app_report.txt')
    test_config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    test_config.retry_file_name = 'retries.txt'
    test_config.retry_fqn = (
        f'{test_config.log_file_directory}/{test_config.retry_file_name}')
    test_config.state_file_name = 'state.yml'
    test_config.success_fqn = (
        f'{test_config.log_file_directory}/success_log.txt')
    test_config.tap_id = 'ivo://cadc.nrc.ca/sc2tap'
    test_config.task_types = [
        mc.TaskType.STORE,
        mc.TaskType.INGEST,
        mc.TaskType.MODIFY,
    ]
    test_config.features.use_file_names = True
    test_config.features.use_urls = False
    test_config.features.supports_latest_client = True
    test_config.use_local_files = False
    test_config.storage_inventory_resource_id = 'ivo://cadc.nrc.ca/test'

    if not os.path.exists(test_wd):
        os.mkdir(test_wd)

    test_start_time, test_end_time = _get_times(test_config,
                                                caom2pipe_bookmark)

    with open(test_config.proxy_fqn, 'w') as f:
        f.write('test content\n')

    test_data_source = TestListDirTimeBoxDataSource()
    test_builder = nbc.GuessingBuilder(tc.TestStorageName)
    transferrer = TestTransfer()

    try:
        test_result = rc.run_by_state(
            bookmark_name=caom2pipe_bookmark,
            command_name='collection2caom2',
            config=test_config,
            end_time=test_end_time,
            name_builder=test_builder,
            source=test_data_source,
            modify_transfer=transferrer,
            store_transfer=transferrer,
            clients=client_mock,
        )

        assert test_result is not None, 'expect a result'
        assert test_result == 0, 'expect success'
        assert client_mock.data_client.put.called, 'expect put call'
        client_mock.data_client.put.assert_called_with(
            '/usr/src/app/caom2pipe/int_test/test_obs_id',
            'cadc:TEST/test_file.fits.gz',
            None,
        ), 'wrong call args'

        # state file checking
        test_state = mc.State(test_config.state_fqn)
        assert test_state is not None, 'expect state content'
        test_checkpoint = test_state.get_bookmark(caom2pipe_bookmark)
        assert test_checkpoint == test_end_time, 'wrong bookmark'

        # success file testing
        assert os.path.exists(test_config.log_file_directory), 'log directory'
        assert os.path.exists(test_config.success_fqn), 'success fqn'
        assert os.path.exists(test_config.progress_fqn), 'progress fqn'
        log_file = f'{test_config.log_file_directory}/test_obs_id.log'
        actual = glob.glob(f'{test_config.log_file_directory}/**')
        assert os.path.exists(log_file), f'specific log file {actual}'
        xml_file = f'{test_config.log_file_directory}/test_obs_id.xml'
        assert os.path.exists(xml_file), f'xml file {actual}'

        # reporting testing
        report_file = f'{test_config.log_file_directory}/app_report.txt'
        assert os.path.exists(report_file), f'report file {actual}'
        pass_through_test = False
        with open(report_file, 'r') as f:
            for line in f:
                pass_through_test = True
                if 'Number' in line:
                    bits = line.split(':')
                    found = False
                    if 'Inputs' in bits[0]:
                        assert bits[1].strip() == '1', 'wrong inputs'
                        found = True
                    elif 'Successes' in bits[0]:
                        assert bits[1].strip() == '1', 'wrong successes'
                        found = True
                    elif 'Timeouts' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong timeouts'
                        found = True
                    elif 'Retries' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong retries'
                        found = True
                    elif 'Errors' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong errors'
                        found = True
                    elif 'Rejections' in bits[0]:
                        assert bits[1].strip() == '0', 'wrong rejections'
                        found = True
                    assert found, f'{line}'
        assert pass_through_test, 'found a report file and checked it'
    finally:
        f_list = glob.glob(f'{test_wd}/**', recursive=True)
        for entry in f_list:
            try:
                if os.path.isdir(entry):
                    os.rmdir(entry)
                else:
                    os.unlink(entry)
            except OSError as e:
                logging.error(f'failed to delete {e}')
def test_state(
    data_mock,
    web_log_mock,
    nrao_mock,
    caom_mock,
    transfer_mock,
    local_header_mock,
    qa_mock,
    test_input_name,
):
    if 'TODO' in test_input_name:
        return

    test_input = INPUTS.get(test_input_name)

    # make sure the working directory TEXT_EXEC_DIR has nothing in it
    for child in TEST_EXEC_DIR.iterdir():
        if child == TEST_EXEC_DIR:
            continue
        if child.is_dir():
            for child_2 in child.iterdir():
                child_2.unlink()
            child.rmdir()
        else:
            child.unlink()

    # make sure the working directory TEST_EXEC_DIR has the correct things
    # in it
    config_file_target = TEST_EXEC_DIR / 'config.yml'
    shutil.copy(test_input.config_file, config_file_target)
    state_file_target = TEST_EXEC_DIR / 'state.yml'
    shutil.copy(test_input.state_file, state_file_target)

    with open(TEST_EXEC_DIR / 'cadcproxy.pem', 'w') as f:
        f.write('test content')

    # make the state file won't take decades to execute
    test_start_time = datetime.now(tz=dateutil.tz.UTC) - timedelta(minutes=5)
    state = mc.State(state_file_target.as_posix())
    state.save_state(test_input.bookmark, test_start_time)

    # import the module for execution
    sys.path.append(test_input.test_path)
    test_module = import_module('composable')

    nrao_mock.side_effect = _nrao_mock

    def _web_log_init(ignore):
        global web_log_content
        web_log_content = {
            'VLASS1.1_T07t13.J083838-153000_P68878v1_2020_08_29T21_'
            '48_48.092':
            '2020-09-09 07:53',
        }

    web_log_mock.side_effect = _web_log_init

    def _transfer_get(src, dst):
        assert (src ==
                'https://archive-new.nrao.edu/vlass/quicklook/VLASS1.1/T07t13/'
                'VLASS1.1.ql.T07t13.J083838-153000.10.2048.v1.I.iter1.image.'
                'pbcor.tt0.rms.subim.fits'), 'wrong source'
        assert (
            dst == '/usr/src/app/integration_test/mock_test/data/execution/'
            'VLASS1.1.T07t13.J083838-153000/'
            'VLASS1.1.ql.T07t13.J083838-153000.10.2048.v1.I.iter1.image.'
            'pbcor.tt0.rms.subim.fits'), 'wrong dst'
        with open(dst, 'w') as f2:
            f2.write('test content')

    transfer_mock.return_value.get.side_effect = _transfer_get

    caom_mock.return_value.metadata_client.read.side_effect = [
        None,
        SimpleObservation(
            'obs_id',
            'VLASS',
            Algorithm(name='exposure'),
        ),
    ]

    def _local_header(ignore):
        x = """SIMPLE  =                    T / Written by IDL:  Fri Oct  6 01:48:35 2017
BITPIX  =                  -32 / Bits per pixel
NAXIS   =                    2 / Number of dimensions
NAXIS1  =                 2048 /
NAXIS2  =                 2048 /
TYPE    = 'image'
BMAJ    = 1.09
BMIN    = 0.19
DATATYPE= 'REDUC   '           /Data type, SCIENCE/CALIB/REJECT/FOCUS/TEST
END
"""
        delim = '\nEND'
        extensions = \
            [e + delim for e in x.split(delim) if e.strip()]
        headers = [fits.Header.fromstring(e, sep='\n') for e in extensions]
        return headers

    local_header_mock.side_effect = _local_header

    def _info(uri):
        return FileInfo(
            id=uri,
            md5sum='abc',
            size=42,
        )

    data_mock.return_value.info.side_effect = _info
    qa_mock.return_value = False

    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=TEST_EXEC_DIR)
    logging.getLogger('StorageClientWrapper').setLevel(logging.DEBUG)
    try:
        test_result = test_module._run_state()
        assert test_result is not None, f'expect a result {test_input_name}'
        assert test_result == 0, f'wrong test result {test_input_name}'

        # was state updated?
        post_state = mc.State(state_file_target.as_posix())
        assert (post_state.get_bookmark(test_input.bookmark) >
                test_start_time), f'state not updated {test_input_name}'
        assert (caom_mock.return_value.data_client.put.called
                ), f'{test_input_name} put not called'
        caom_mock.return_value.data_client.put.assert_called_with(
            '/usr/src/app/integration_test/mock_test/data/execution/'
            'VLASS1.1.T07t13.J083838-153000',
            'nrao:VLASS/VLASS1.1.ql.T07t13.J083838-153000.10.2048.v1.I.'
            'iter1.image.pbcor.tt0.rms.subim.fits',
            None,
        ), f'{test_input_name} wrong put args'
    finally:
        os.getcwd = getcwd_orig
        del sys.modules['composable']
def test_gem_state(
    data_mock,
    caom_mock,
    local_header_mock,
    json_mock,
    filter_mock,
    http_get_mock,
    endpoint_mock,
    tap_mock,
    external_header_mock,
    test_input_name,
):
    if 'GEM_STATE' not in test_input_name:
        return
    test_input = INPUTS.get(test_input_name)
    _cleanup()

    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=TEST_EXEC_DIR)

    test_start_time, state_file_target = _setup(test_input)

    def _json_mock(url, ignore_session):
        response = Mock()
        response.close = Mock()
        fqn = test_input.input_dir / 'input.json'
        with open(fqn) as f:
            response.text = f.read()

        def x():
            return json.loads(response.text)

        response.json = x
        return response

    json_mock.side_effect = _json_mock

    def _endpoint_mock(ignore):
        assert (ignore.startswith(
            'https://archive.gemini.edu/jsonsummary/canonical/NotFail/'
            'notengineering/entrytimedaterange')
                ), 'wrong url for incremental querying'
        return _json_mock(ignore, None)

    endpoint_mock.side_effect = _endpoint_mock

    def _filter_mock():
        from astropy.table import parse_single_table
        fqn = test_input.input_dir / 'filter.xml'
        content = parse_single_table(fqn)
        return content, None

    filter_mock.side_effect = _filter_mock

    caom_mock.return_value.metadata_client.read.side_effect = [
        None,
        SimpleObservation(
            'obs_id',
            'GEMINI',
            Algorithm(name='exposure'),
        ),
    ]

    def _tap_query(
        ignore_query,
        output_file,
        data_only=True,
        response_format='csv',
    ):
        output_file.write(
            'observationID,instrument_name\n'
            'GS-CAL20191214-1-029,F2\n', )

    # caom_mock.return_value.query_client.query.side_effect = _tap_query
    tap_mock.return_value.query.side_effect = _tap_query

    def _local_header(ignore):
        x = """SIMPLE  =                    T / Written by IDL:  Fri Oct  6 01:48:35 2017
BITPIX  =                  -32 / Bits per pixel
NAXIS   =                    2 / Number of dimensions
NAXIS1  =                   14 /
NAXIS2  =                   24 /
INSTRUME= 'F2'
DATALAB = 'GS-CAL20191214-1-029
END
"""
        delim = '\nEND'
        extensions = \
            [e + delim for e in x.split(delim) if e.strip()]
        headers = [fits.Header.fromstring(e, sep='\n') for e in extensions]
        return headers

    local_header_mock.side_effect = _local_header
    external_header_mock.side_effect = _local_header

    def _info(uri):
        return FileInfo(
            id=uri,
            md5sum='abc',
            size=42,
        )

    data_mock.return_value.info.side_effect = _info

    # import the module for execution
    sys.path.append(test_input.test_path)
    test_module = import_module('composable')

    try:
        test_result = test_module._run_state()
        assert test_result is not None, f'expect a result {test_input_name}'
        assert test_result == 0, f'wrong test result {test_input_name}'

        # was state updated?
        post_state = mc.State(state_file_target.as_posix())
        assert (post_state.get_bookmark(test_input.bookmark) >
                test_start_time), f'state not updated {test_input_name}'
        assert (caom_mock.return_value.data_client.put.called
                ), f'{test_input_name} put not called'
        caom_mock.return_value.data_client.put.assert_called_with(
            '/usr/src/app/integration_test/mock_test/data/execution/'
            'GS-CAL20191214-1-029',
            'gemini:GEMINI/S20191214S0301.fits',
        ), f'{test_input_name} wrong put args'
        assert http_get_mock.called, 'expect http get call'
        http_get_mock.assert_called_with(
            'https://archive.gemini.edu/file/S20191214S0301.fits',
            '/usr/src/app/integration_test/mock_test/data/execution/'
            'GS-CAL20191214-1-029/S20191214S0301.fits',
        ), 'wrong http get args'
    except Exception as e:
        logging.error(traceback.format_exc())
        raise e
    finally:
        os.getcwd = getcwd_orig
        del sys.modules['composable']
Example #21
0
def retrieve_obs_metadata(obs_id):
    """Maybe someday this can be done with astroquery, but the VLASS
    metadata isn't in the database that astroquery.Nrao points to, so
    that day is not today."""
    metadata = {}
    mod_obs_id = obs_id.replace('.', '_', 2).replace('_', '.', 1)
    global web_log_content
    if len(web_log_content) == 0:
        config = mc.Config()
        config.get_executors()
        logging.warning('Initializing from /weblog. This may take a while.')
        state = mc.State(config.state_fqn)
        init_web_log(state)
    latest_key = None
    max_ts = None
    tz_info = tz.gettz('US/Socorro')
    # there may be multiple processing runs for a single obs id, use the
    # most recent
    for key in web_log_content.keys():
        if key.startswith(mod_obs_id):
            dt_bits = '_'.join(
                ii for ii in key.replace('/', '').split('_')[3:]
            )
            dt_tz = make_date_time(dt_bits).replace(tzinfo=tz_info)
            if max_ts is None:
                max_ts = dt_tz
                latest_key = key
            else:
                if max_ts < dt_tz:
                    max_ts = dt_tz
                    latest_key = key

    session = mc.get_endpoint_session()
    if latest_key is not None:
        obs_url = f'{QL_WEB_LOG_URL}{latest_key}'
        logging.debug(f'Querying {obs_url}')
        response = None
        try:
            response = mc.query_endpoint_session(obs_url, session)
            if response is None:
                logging.error(f'Could not query {obs_url}')
            else:
                soup = BeautifulSoup(response.text, features='lxml')
                response.close()
                pipeline_bit = soup.find(string=re.compile('pipeline-'))
                if pipeline_bit is None:
                    logging.error(f'Did not find pipeline on {obs_url}')
                else:
                    pipeline_url = (
                        f'{obs_url}{pipeline_bit.strip()}html/index.html'
                    )
                    logging.debug(f'Querying {pipeline_url}')
                    response = mc.query_endpoint_session(pipeline_url, session)
                    if response is None:
                        logging.error(f'Could not query {pipeline_url}')
                    else:
                        metadata = _parse_single_field(response.text)
                        metadata['reference'] = pipeline_url
                        logging.debug(f'Setting reference to {pipeline_url}')
                    response.close()
        finally:
            if response is not None:
                response.close()
    return metadata