Esempio n. 1
0
def _run_by_state():
    """Uses a state file with a timestamp to control which quicklook
    files will be retrieved from VLASS.

    Ingestion is based on URLs, because a URL that contains the phrase
    'QA_REJECTED' is the only way to tell if the attribute 'requirements'
    should be set to 'fail', or not.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    # a way to get a datetime from a string, or maybe a datetime, depending
    # on the execution environment
    start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0)
    todo_list, max_date = scrape.build_file_url_list(start_time)
    if len(todo_list) > 0:
        state = mc.State(config.state_fqn)
        work.init_web_log(state, config)
    # still make all subsequent calls if len == 0, for consistent reporting
    source = data_source.NraoPage(todo_list)
    name_builder = builder.VlassInstanceBuilder(config)
    return rc.run_by_state(config=config,
                           command_name=sn.APPLICATION,
                           bookmark_name=VLASS_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           name_builder=name_builder,
                           source=source,
                           end_time=max_date,
                           store_transfer=tc.HttpTransfer())
Esempio n. 2
0
def _run():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()

    # time_bounds_augmentation and quality_augmentation depend on
    # metadata scraped from the NRAO site, but that only changes if a new
    # file is created, a new version of a file is created, or an old version
    # of a file is replaced. If the pipeline isn't STORE'ing information from
    # the source, files aren't changing, and the related metadata isn't
    # changing, so be polite to the NRAO site, and don't scrape if it's not
    # necessary.
    meta_visitors = [cleanup_augmentation]
    if (mc.TaskType.STORE in config.task_types
            and mc.TaskType.INGEST in config.task_types):
        meta_visitors = META_VISITORS
    name_builder = nbc.EntryBuilder(storage_name.VlassName)
    storage_name.set_use_storage_inventory(
        config.features.supports_latest_client)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        store_transfer=tc.HttpTransfer(),
    )
Esempio n. 3
0
def _run_state():
    """Uses a state file with a timestamp to control which quicklook
    files will be retrieved from VLASS.

    Ingestion is based on URLs, because a URL that contains the phrase
    'QA_REJECTED' is the only way to tell if the attribute 'requirements'
    should be set to 'fail', or not.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    # a way to get a datetime from a string, or maybe a datetime, depending
    # on the execution environment
    start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0)
    todo_list, max_date = scrape.build_file_url_list(start_time)
    source = data_source.NraoPage(todo_list)
    name_builder = nbc.EntryBuilder(storage_name.VlassName)
    storage_name.set_use_storage_inventory(
        config.features.supports_latest_client)
    return rc.run_by_state(
        config=config,
        bookmark_name=VLASS_BOOKMARK,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        name_builder=name_builder,
        source=source,
        end_time=max_date,
        store_transfer=tc.HttpTransfer(),
    )
Esempio n. 4
0
def _run_single():
    """expects a single file name on the command line"""
    builder = nbc.EntryBuilder(storage_name.VlassName)
    vlass_name = builder.build(sys.argv[1])
    return rc.run_single(
        storage_name=vlass_name,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        store_transfer=tc.HttpTransfer(),
    )
Esempio n. 5
0
def _run_single():
    """expects a single file name on the command line"""
    config = mc.Config()
    config.get_executors()
    if config.features.use_file_names:
        vlass_name = sn.VlassName(file_name=sys.argv[1], entry=sys.argv[1])
    elif config.features.use_urls:
        vlass_name = sn.VlassName(url=sys.argv[1], entry=sys.argv[1])
    else:
        vlass_name = sn.VlassName(obs_id=sys.argv[1], entry=sys.argv[1])
    return rc.run_single(config=config,
                         storage_name=vlass_name,
                         command_name=sn.APPLICATION,
                         meta_visitors=META_VISITORS,
                         data_visitors=DATA_VISITORS,
                         store_transfer=tc.HttpTransfer())
Esempio n. 6
0
def _run():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    work.init_web_log(state, config)
    name_builder = builder.VlassInstanceBuilder(config)
    return rc.run_by_todo(config=config,
                          name_builder=name_builder,
                          command_name=sn.APPLICATION,
                          meta_visitors=META_VISITORS,
                          data_visitors=DATA_VISITORS,
                          store_transfer=tc.HttpTransfer())
Esempio n. 7
0
def test_http_transfer(get_mock):
    test_source = 'http://localhost/test_file.fits'
    test_destination = '/tmp/test_file.fits'
    if not os.path.exists(test_destination):
        with open(test_destination, 'w') as f:
            f.write('test content')
    get_mock.side_effect = Mock(autospec=True)
    test_config = mc.Config()
    test_config.working_directory = test_conf.TEST_DATA_DIR
    test_config.netrc_file = 'test_netrc'
    test_config.rejected_fqn = '/tmp/rejected.yml'
    test_observable = mc.Observable(mc.Rejected(test_config.rejected_fqn),
                                    mc.Metrics(test_config))
    test_subject = tc.HttpTransfer()
    assert test_subject is not None, 'expect a result'
    test_subject.observable = test_observable
    with pytest.raises(mc.CadcException):
        test_subject.get(test_source, test_destination)
        assert get_mock.called, 'should have been called'
        args, kwargs = get_mock.call_args
        assert args[1] == test_source, 'wrong source name'
        assert args[2] == test_destination, 'wrong dest name'