def test_run_todo_list_dir_data_source_invalid_fname_v(
    set_clients_mock, test_config
):
    set_clients_mock.side_effect = _clients_mock
    test_config.working_directory = TEST_DIR
    test_config.use_local_files = True
    test_config.task_types = [mc.TaskType.INGEST]
    test_config.log_to_file = False
    test_config.features.supports_latest_client = True

    if os.path.exists(test_config.failure_fqn):
        os.unlink(test_config.failure_fqn)
    if os.path.exists(test_config.retry_fqn):
        os.unlink(test_config.retry_fqn)
    if not os.path.exists(f'{TEST_DIR}/abc.fits.gz'):
        with open(f'{TEST_DIR}/abc.fits.gz', 'w') as f:
            f.write('abc')

    class TestStorageName(mc.StorageName):
        def __init__(self, entry):
            self._obs_id = entry

        def is_valid(self):
            return False

    class TestStorageNameInstanceBuilder(b.StorageNameInstanceBuilder):
        def __init__(self):
            pass

        def build(self, entry):
            return TestStorageName(entry)

    test_builder = TestStorageNameInstanceBuilder()
    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_todo(
        config=test_config,
        chooser=test_chooser,
        name_builder=test_builder,
        command_name=TEST_COMMAND,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == -1, 'expect failure, because of file naming'
    assert (
        not os.path.exists(test_config.failure_fqn)
    ), 'no logging, no failure file'
    assert (
        not os.path.exists(test_config.retry_fqn)
    ), 'no logging, no retry file'
    test_config.log_to_file = True
    test_result = rc.run_by_todo(
        config=test_config,
        chooser=test_chooser,
        command_name=TEST_COMMAND,
    )
    assert test_result is not None, 'expect a result'
    assert test_result == -1, 'expect failure, because of file naming'
    assert os.path.exists(test_config.failure_fqn), 'expect failure file'
    assert os.path.exists(test_config.retry_fqn), 'expect retry file'
def test_run_todo_retry(do_one_mock, clients_mock, test_config):
    test_config.features.supports_latest_client = True
    (
        retry_success_fqn,
        retry_failure_fqn,
        retry_retry_fqn,
    ) = _clean_up_log_files(test_config)

    do_one_mock.side_effect = _mock_do_one

    test_config.work_fqn = f'{tc.TEST_DATA_DIR}/todo.txt'
    test_config.log_to_file = True
    test_config.retry_failures = True
    _write_todo(test_config)

    test_result = rc.run_by_todo(config=test_config, command_name=TEST_COMMAND)

    assert test_result is not None, 'expect a result'
    assert test_result == -1, 'expect failure'
    _check_log_files(test_config, retry_success_fqn, retry_failure_fqn,
                     retry_retry_fqn)
    assert do_one_mock.called, 'expect do_one call'
    assert do_one_mock.call_count == 2, 'wrong number of calls'

    assert not (clients_mock.return_value.metadata_client.read.called
                ), 'do_one is mocked, should be no metadata client call'
    assert not (clients_mock.return_value.data_client.get_file_info.called
                ), 'do_one is mocked, should be no data client call'
Exemple #3
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config, clients, name_builder, metadata_reader = _common()
    files_source = None
    if config.use_local_files:
        if config.cleanup_files_when_storing:
            files_source = data_source.DAOLocalFilesDataSource(
                config, clients.data_client, metadata_reader
            )
    else:
        files_source = dsc.TodoFileDataSource(config)
    return rc.run_by_todo(
        name_builder=name_builder,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        clients=clients,
        config=config,
        source=files_source,
        metadata_reader=metadata_reader,
    )
def test_run_todo_file_data_source(clients_mock, test_config):
    clients_mock.return_value.data_client.get_file_info.return_value = None
    clients_mock.return_value.metadata_client.read.return_value = (
        SimpleObservation(
            collection=test_config.collection,
            observation_id='def',
            algorithm=Algorithm(str('test')),
        ))

    if os.path.exists(test_config.success_fqn):
        os.unlink(test_config.success_fqn)

    test_config.work_fqn = f'{TEST_DIR}/todo.txt'
    test_config.task_types = [mc.TaskType.VISIT]
    test_config.log_to_file = True

    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_todo(config=test_config,
                                 chooser=test_chooser,
                                 command_name=TEST_COMMAND)
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    assert os.path.exists(test_config.success_fqn), 'expect success file'

    with open(test_config.success_fqn) as f:
        content = f.read()
        # the obs id and file name
        assert 'def def.fits' in content, 'wrong success message'
def test_run_todo_list_dir_data_source_v(
    write_obs_mock,
    read_obs_mock,
    fits2caom2_in_out_mock,
    fits2caom2_mock,
    clients_mock,
    test_config,
):
    read_obs_mock.side_effect = _mock_read
    test_config.working_directory = tc.TEST_DATA_DIR
    test_config.use_local_files = True
    test_config.data_sources = [tc.TEST_FILES_DIR]
    test_config.data_source_extensions = ['.fits']
    test_config.task_types = [mc.TaskType.SCRAPE]
    test_config.features.supports_latest_client = True
    test_result = rc.run_by_todo(config=test_config, command_name=TEST_COMMAND)
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    if fits2caom2_mock.called:
        fits2caom2_mock.assert_called_with(connected=False)
    else:
        assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call'
    assert read_obs_mock.called, 'read_obs not called'
    assert write_obs_mock.called, 'write_obs mock not called'
    assert not (clients_mock.return_value.metadata_client.read.called
                ), 'scrape, should be no client access'
    assert not (clients_mock.return_value.data_client.get_file.called
                ), 'scrape, should be no client access'
def test_run_todo_list_dir_data_source(
    write_obs_mock,
    read_obs_mock,
    fits2caom2_in_out_mock,
    fits2caom2_mock,
    test_config,
):
    read_obs_mock.side_effect = _mock_read
    test_config.working_directory = tc.TEST_DATA_DIR
    test_config.use_local_files = True
    test_config.task_types = [mc.TaskType.SCRAPE]
    test_config.data_sources = [tc.TEST_FILES_DIR]
    test_config.data_source_extensions = ['.fits']

    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_todo(config=test_config,
                                 chooser=test_chooser,
                                 command_name=TEST_COMMAND)
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    if fits2caom2_mock.called:
        fits2caom2_mock.assert_called_with(connected=False)
    else:
        assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call'
    assert write_obs_mock.called, 'expect write call'
def test_run_todo_file_data_source_v(
    repo_read_mock, set_clients_mock, test_config
):
    set_clients_mock.side_effect = _clients_mock
    test_config.features.supports_latest_client = True
    test_cert_file = os.path.join(TEST_DIR, 'test_proxy.pem')
    test_config.proxy_fqn = test_cert_file

    repo_read_mock.return_value = SimpleObservation(
        collection=test_config.collection,
        observation_id='def',
        algorithm=Algorithm(str('test')),
    )

    if os.path.exists(test_config.success_fqn):
        os.unlink(test_config.success_fqn)

    test_config.work_fqn = f'{TEST_DIR}/todo.txt'
    test_config.task_types = [mc.TaskType.VISIT]
    test_config.log_to_file = True

    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_todo(
        config=test_config, chooser=test_chooser, command_name=TEST_COMMAND
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    assert os.path.exists(test_config.success_fqn), 'expect success file'

    with open(test_config.success_fqn) as f:
        content = f.read()
        # the obs id and file name
        assert 'def def.fits' in content, 'wrong success message'
    assert repo_read_mock.called, 'expect e call'
    repo_read_mock.assert_called_with(), 'wrong e args'
Exemple #8
0
def _run():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()

    # time_bounds_augmentation and quality_augmentation depend on
    # metadata scraped from the NRAO site, but that only changes if a new
    # file is created, a new version of a file is created, or an old version
    # of a file is replaced. If the pipeline isn't STORE'ing information from
    # the source, files aren't changing, and the related metadata isn't
    # changing, so be polite to the NRAO site, and don't scrape if it's not
    # necessary.
    meta_visitors = [cleanup_augmentation]
    if (mc.TaskType.STORE in config.task_types
            and mc.TaskType.INGEST in config.task_types):
        meta_visitors = META_VISITORS
    name_builder = nbc.EntryBuilder(storage_name.VlassName)
    storage_name.set_use_storage_inventory(
        config.features.supports_latest_client)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        store_transfer=tc.HttpTransfer(),
    )
Exemple #9
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    name_builder = nbc.FileNameBuilder(get_storage_name)
    return rc.run_by_todo(name_builder=name_builder,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS)
Exemple #10
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    return rc.run_by_todo(config=None,
                          name_builder=nbc.ObsIDBuilder(mc.StorageName),
                          command_name='visitCaom2',
                          meta_visitors=META_VISITORS, 
                          data_visitors=DATA_VISITORS, chooser=None)
Exemple #11
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    name_builder = nbc.FileNameBuilder(PHANGSName)
    return rc.run_by_todo(config=None,
                          name_builder=name_builder,
                          command_name=APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          chooser=None)
Exemple #12
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    StorageName.collection = COLLECTION
    return rc.run_by_todo(
        config=None,
        name_builder=None,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        chooser=None,
    )
Exemple #13
0
def _run():
    """
    Uses a todo file with file names, even though Gemini provides
    information about existing data referenced by observation ID.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = builder.GemObsIDBuilder(config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_by_todo(config,
                          name_builder,
                          chooser=None,
                          command_name=main_app.APPLICATION,
                          meta_visitors=meta_visitors)
Exemple #14
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    builder = nbc.FileNameBuilder(NEOSSatName)
    config = mc.Config()
    config.get_executors()
    transferrer = tc.FtpTransfer(config.data_source)
    return rc.run_by_todo(name_builder=builder,
                          config=config,
                          command_name=APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=transferrer)
def test_run_todo_list_dir_data_source(
        read_obs_mock, fits2caom2_in_out_mock, fits2caom2_mock, test_config
):
    read_obs_mock.side_effect = _mock_read
    test_config.working_directory = tc.TEST_FILES_DIR
    test_config.use_local_files = True
    test_config.task_types = [mc.TaskType.SCRAPE]

    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_todo(
        config=test_config, chooser=test_chooser, command_name=TEST_COMMAND
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    if fits2caom2_mock.called:
        assert not fits2caom2_in_out_mock.called, 'expect no in/out call'
    else:
        assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call'
Exemple #16
0
def _run_by_builder():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    builder = cfht_builder.CFHTBuilder(config)
    return rc.run_by_todo(config,
                          builder,
                          chooser=None,
                          command_name=main_app.APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS)
Exemple #17
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = builder.GemProcBuilder(config)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        command_name=main_app.APPLICATION,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
    )
def test_run_todo_list_dir_data_source_exception(do_one_mock, clients_mock,
                                                 test_config):
    test_config.working_directory = TEST_DIR
    test_config.use_local_files = True
    test_config.task_types = [mc.TaskType.SCRAPE]
    test_config.log_to_file = True

    for entry in [False, True]:
        test_config.features.supports_latest_client = entry
        do_one_mock.side_effect = mc.CadcException

        if os.path.exists(test_config.failure_fqn):
            os.unlink(test_config.failure_fqn)
        if os.path.exists(test_config.retry_fqn):
            os.unlink(test_config.retry_fqn)

        test_chooser = ec.OrganizeChooser()
        test_data_source = dsc.ListDirDataSource(test_config, test_chooser)
        test_result = rc.run_by_todo(
            config=test_config,
            chooser=test_chooser,
            command_name=TEST_COMMAND,
            source=test_data_source,
        )
        assert test_result is not None, 'expect a result'
        assert test_result == -1, 'expect failure'
        assert do_one_mock.called, 'expect do_one call'
        assert os.path.exists(test_config.failure_fqn), 'expect failure file'
        assert os.path.exists(test_config.retry_fqn), 'expect retry file'

        with open(test_config.failure_fqn) as f:
            content = f.read()
            # the obs id and file name
            assert 'abc abc.fits' in content, 'wrong failure message'

        with open(test_config.retry_fqn) as f:
            content = f.read()
            # retry file names
            assert content == 'abc.fits\n', 'wrong retry content'

        assert not (clients_mock.return_value.metadata_client.read.called
                    ), 'scrape, should be no metadata client call'
        assert not (clients_mock.return_value.data_client.get_file_info.called
                    ), 'scrape, should be no data client call'
Exemple #19
0
def _run():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    work.init_web_log(state, config)
    name_builder = builder.VlassInstanceBuilder(config)
    return rc.run_by_todo(config=config,
                          name_builder=name_builder,
                          command_name=sn.APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=tc.HttpTransfer())
def _run_remote():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    name_builder = nbc.FileNameBuilder(GemProcName)
    vos_client = Client(vospace_certfile=config.proxy_fqn)
    store_transfer = tc.VoFitsTransfer(vos_client)
    data_source = dsc.VaultListDirDataSource(vos_client, config)
    return rc.run_by_todo(config=config,
                          name_builder=name_builder,
                          command_name=APPLICATION,
                          source=data_source,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=store_transfer)
def test_run_todo_list_dir_data_source_v(
    read_obs_mock,
    fits2caom2_in_out_mock,
    fits2caom2_mock,
    set_clients_mock,
    test_config,
):
    set_clients_mock.side_effect = _clients_mock
    read_obs_mock.side_effect = _mock_read
    test_config.working_directory = tc.TEST_FILES_DIR
    test_config.use_local_files = True
    test_config.task_types = [mc.TaskType.SCRAPE]
    test_config.features.supports_latest_client = True
    test_result = rc.run_by_todo(config=test_config, command_name=TEST_COMMAND)
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    if fits2caom2_mock.called:
        assert not fits2caom2_in_out_mock.called, 'expect no in/out call'
    else:
        assert fits2caom2_in_out_mock.called, 'expect fits2caom2 in/out call'
    assert read_obs_mock.called, 'read_obs not called'
def test_run_todo_retry(do_one_mock, test_config):
    retry_success_fqn, retry_failure_fqn, retry_retry_fqn = \
        _clean_up_log_files(test_config)

    do_one_mock.side_effect = _mock_do_one

    test_config.work_fqn = f'{tc.TEST_DATA_DIR}/todo.txt'
    test_config.log_to_file = True
    test_config.retry_failures = True
    _write_todo(test_config)

    test_result = rc.run_by_todo(
        config=test_config, command_name=TEST_COMMAND
    )

    assert test_result is not None, 'expect a result'
    assert test_result == -1, 'expect failure'
    _check_log_files(
        test_config, retry_success_fqn, retry_failure_fqn, retry_retry_fqn
    )
    assert do_one_mock.called, 'expect do_one call'
    assert do_one_mock.call_count == 2, 'wrong number of calls'
def test_run_todo_file_data_source(
    caps_mock,
    ad_mock,
    data_client_mock,
    set_clients_mock,
    test_config
):
    set_clients_mock.side_effect = _clients_mock
    caps_mock.return_value = 'https://sc2.canfar.net/sc2repo'
    response = Mock()
    response.status_code = 200
    response.iter_content.return_value = [b'fileName\n']
    ad_mock.return_value.__enter__.return_value = response

    data_client_mock.return_value = SimpleObservation(
        collection=test_config.collection, observation_id='def',
        algorithm=Algorithm(str('test'))
    )

    if os.path.exists(test_config.success_fqn):
        os.unlink(test_config.success_fqn)

    test_config.work_fqn = f'{TEST_DIR}/todo.txt'
    test_config.task_types = [mc.TaskType.VISIT]
    test_config.log_to_file = True

    test_chooser = ec.OrganizeChooser()
    test_result = rc.run_by_todo(
        config=test_config, chooser=test_chooser, command_name=TEST_COMMAND
    )
    assert test_result is not None, 'expect a result'
    assert test_result == 0, 'expect success'
    assert os.path.exists(test_config.success_fqn), 'expect success file'

    with open(test_config.success_fqn) as f:
        content = f.read()
        # the obs id and file name
        assert 'def def.fits' in content, 'wrong success message'
Exemple #24
0
def _run_vo():
    """
    Uses a VOS listdir to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config, clients, name_builder, metadata_reader = _common()
    vos_client = Client(vospace_certfile=config.proxy_file_name)
    clients.vo_client = vos_client
    source = data_source.DAOVaultDataSource(
        config, clients.vo_client, clients.data_client
    )
    store_transferrer = transfer.VoFitsCleanupTransfer(vos_client, config)
    return rc.run_by_todo(
        name_builder=name_builder,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        source=source,
        clients=clients,
        store_transfer=store_transferrer,
        metadata_reader=metadata_reader,
    )
Exemple #25
0
def _run():
    """
    Uses a todo file with file names, even though Gemini provides
    information about existing data referenced by observation ID.
    """
    (
        clients,
        config,
        metadata_reader,
        meta_visitors,
        name_builder,
    ) = _common_init()
    if config.use_local_files or mc.TaskType.SCRAPE in config.task_types:
        source = dsc.ListDirSeparateDataSource(config)
    else:
        source = dsc.TodoFileDataSource(config)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        meta_visitors=meta_visitors,
        source=source,
        metadata_reader=metadata_reader,
        clients=clients,
    )
def test_run_todo_list_dir_data_source_invalid_fname_v(clients_mock,
                                                       test_config):
    test_dir = os.path.join('/test_files', '1')
    test_fqn = os.path.join(test_dir, 'abc.fits.gz')

    test_config.working_directory = tc.TEST_DATA_DIR
    test_config.use_local_files = True
    test_config.data_sources = [test_dir]
    test_config.data_source_extensions = ['.fits', '.fits.gz']
    test_config.task_types = [mc.TaskType.INGEST]
    test_config.log_to_file = False
    test_config.features.supports_latest_client = True

    if os.path.exists(test_config.failure_fqn):
        os.unlink(test_config.failure_fqn)
    if os.path.exists(test_config.retry_fqn):
        os.unlink(test_config.retry_fqn)

    if not os.path.exists(test_dir):
        os.mkdir(test_dir)
    if not os.path.exists(test_fqn):
        with open(test_fqn, 'w') as f:
            f.write('abc')

    class TestStorageName(mc.StorageName):
        def __init__(self, entry):
            self._obs_id = os.path.basename(entry)
            self._source_names = [entry]

        def is_valid(self):
            return False

    class TestStorageNameInstanceBuilder(b.StorageNameInstanceBuilder):
        def __init__(self):
            pass

        def build(self, entry):
            return TestStorageName(entry)

    try:
        test_builder = TestStorageNameInstanceBuilder()
        test_chooser = ec.OrganizeChooser()
        test_result = rc.run_by_todo(
            config=test_config,
            chooser=test_chooser,
            name_builder=test_builder,
            command_name=TEST_COMMAND,
        )
        assert test_result is not None, 'expect a result'
        assert test_result == -1, 'expect failure, because of file naming'
        assert os.path.exists(test_config.failure_fqn), 'expect failure file'
        assert os.path.exists(test_config.retry_fqn), 'expect retry file'
        assert (not clients_mock.metadata_client.read.called
                ), 'repo client read access happens after is_valid call'
        assert not (clients_mock.data_client.get_file.called
                    ), 'bad file naming, should be no client access'
    finally:
        if os.path.exists(test_fqn):
            os.unlink(test_fqn)
        if os.path.exists(test_dir):
            logging.error(os.listdir(test_dir))
            os.rmdir(test_dir)