Beispiel #1
0
def test_returned_datatype_no_type_endpoint():
    c = ConfigSettings('servicex', 'servicex')
    c.clear()
    c['backend_types'] = [{'type': 'forkit', 'return_data': 'spoon'}]
    c['api_endpoints'] = [{'endpoint': 'http://localhost:5000'}]
    x = ServiceXConfigAdaptor(c)
    assert x.get_default_returned_datatype('forkit') == 'spoon'
def test_sx_adaptor_settings_name_not_type(caplog):
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [
        {
            'type': 'my-type1',
            'name': 'my-fork1',
            'endpoint': 'http://my-left-foot1.com:5000',
            'token': 'forkingshirtballs.thegoodplace.bortles'
        },
        {
            'type': 'my-type2',
            'name': 'my-type1',
            'endpoint': 'http://my-left-foot2.com:5000',
            'token': 'forkingshirtballs.thegoodplace.bortles'
        },
    ]
    x = ServiceXConfigAdaptor(c)
    endpoint, token = x.get_servicex_adaptor_config('my-type1')

    assert endpoint == 'http://my-left-foot2.com:5000'
    assert token == 'forkingshirtballs.thegoodplace.bortles'

    assert len(caplog.record_tuples) == 0
Beispiel #3
0
def test_sx_adaptor_nothing():
    c = ConfigSettings('servicex', 'servicex')
    c.clear()
    x = ServiceXConfigAdaptor(c)

    with pytest.raises(ServiceXException):
        x.get_servicex_adaptor_config()
Beispiel #4
0
def test_default_config_endpoint():
    c = ConfigSettings('servicex', 'servicex')
    c.clear()
    c._add_default_source()
    x = ServiceXConfigAdaptor(c)

    end_point, token = x.get_servicex_adaptor_config()
    assert end_point == 'http://localhost:5000'
    assert token is None
def test_sx_adaptor_settings_backend_name_requested_with_unlabeled_type(
        caplog):
    'Request None for a backend name'
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [{
        'endpoint': 'http://my-left-foot.com:5000',
        'token': 'forkingshirtballs.thegoodplace.bortles'
    }]
    x = ServiceXConfigAdaptor(c)
    with pytest.raises(ServiceXException) as e:
        _ = x.get_servicex_adaptor_config('xaod')

    assert 'Unable to find' in str(e)
def test_sx_adaptor_settings_name_worng(caplog):
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [{
        'type': 'my-type',
        'name': 'my-fork',
        'endpoint': 'http://my-left-foot.com:5000',
        'token': 'forkingshirtballs.thegoodplace.bortles'
    }]
    x = ServiceXConfigAdaptor(c)
    with pytest.raises(ServiceXException) as e:
        x.get_servicex_adaptor_config('my-type')

    assert 'Unable to find type my-type' in str(e)
Beispiel #7
0
def test_sx_adaptor_settings_backend_name_unlabeled_type():
    'Request None for a backend name'
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [{
        'type': 'xaod',
        'endpoint': 'http://my-left-foot.com:5000',
        'token': 'forkingshirtballs.thegoodplace.bortles'
    }, {
        'endpoint': 'http://my-left-foot.com:5001',
        'token': 'forkingshirtballs.thegoodplace.bortles1'
    }]
    x = ServiceXConfigAdaptor(c)
    endpoint, token = x.get_servicex_adaptor_config()

    assert endpoint == 'http://my-left-foot.com:5001'
    assert token == 'forkingshirtballs.thegoodplace.bortles1'
Beispiel #8
0
def test_sx_adaptor_settings_backend_name_requested_with_unlabeled_type(
        caplog):
    'Request None for a backend name'
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [{
        'endpoint': 'http://my-left-foot.com:5000',
        'token': 'forkingshirtballs.thegoodplace.bortles'
    }]
    x = ServiceXConfigAdaptor(c)
    endpoint, token = x.get_servicex_adaptor_config('xaod')

    assert endpoint == 'http://my-left-foot.com:5000'
    assert token == 'forkingshirtballs.thegoodplace.bortles'

    assert caplog.record_tuples[0][2] == "No 'xaod' backend type found, " \
                                         "using http://my-left-foot.com:5000 - please add to " \
                                         "the configuration file (e.g. servicex.yaml)"
Beispiel #9
0
def test_sx_adaptor_settings_no_backend_name_requested_or_listed(caplog):
    'Request None for a backend name'
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [{
        'endpoint': 'http://my-left-foot.com:5000',
        'token': 'forkingshirtballs.thegoodplace.bortles'
    }]
    x = ServiceXConfigAdaptor(c)
    endpoint, token = x.get_servicex_adaptor_config()

    assert endpoint == 'http://my-left-foot.com:5000'
    assert token == 'forkingshirtballs.thegoodplace.bortles'

    assert caplog.record_tuples[0][2] == "No backend type requested, " \
                                         "using http://my-left-foot.com:5000 - please be " \
                                         "explicit " \
                                         "in the ServiceXDataset constructor"
Beispiel #10
0
def test_sx_adaptor_settings_env():
    from confuse import Configuration
    c = Configuration('bogus', 'bogus')
    c.clear()
    c['api_endpoints'] = [{
        'type': '${SXTYPE}',
        'endpoint': '${ENDPOINT}:5000',
        'token': '${SXTOKEN}',
    }]

    from os import environ
    environ['ENDPOINT'] = 'http://tachi.com'
    environ['SXTYPE'] = 'mcrn'
    environ['SXTOKEN'] = 'protomolecule'

    x = ServiceXConfigAdaptor(c)
    endpoint, token = x.get_servicex_adaptor_config('mcrn')

    assert endpoint == 'http://tachi.com:5000'
    assert token == 'protomolecule'
Beispiel #11
0
    def __init__(self,
                 dataset: DatasetType,
                 backend_name: Optional[str] = None,
                 image: str = None,
                 max_workers: int = 20,
                 servicex_adaptor: ServiceXAdaptor = None,
                 minio_adaptor: Union[MinioAdaptor, MinioAdaptorFactory] = None,
                 cache_adaptor: Optional[Cache] = None,
                 status_callback_factory: Optional[StatusUpdateFactory] = _run_default_wrapper,
                 local_log: log_adaptor = None,
                 session_generator: Callable[[], Awaitable[aiohttp.ClientSession]] = None,
                 config_adaptor: Optional[ServiceXConfigAdaptor] = None,
                 data_convert_adaptor: Optional[DataConverterAdaptor] = None,
                 ignore_cache: bool = False):
        '''
        Create and configure a ServiceX object for a dataset.

        Arguments

            dataset                     Name of a dataset from which queries will be selected.
            backend_name                The type of backend. Used only if we need to find an
                                        end-point. If we do not have a `servicex_adaptor` then this
                                        will default to xaod, unless you have any endpoint listed
                                        in your servicex file. It will default to best match there,
                                        or fail if a name has been given.
            image                       Name of transformer image to use to transform the data. If
                                        left as default, `None`, then the default image for the
                                        ServiceX backend will be used.
            max_workers                 Maximum number of transformers to run simultaneously on
                                        ServiceX.
            servicex_adaptor            Object to control communication with the servicex instance
                                        at a particular ip address with certian login credentials.
                                        Will be configured via the `config_adaptor` by default.
            minio_adaptor               Object to control communication with the minio servicex
                                        instance.
            cache_adaptor               Runs the caching for data and queries that are sent up and
                                        down.
            status_callback_factory     Factory to create a status notification callback for each
                                        query. One is created per query.
            local_log                   Log adaptor for logging.
            session_generator           If you want to control the `ClientSession` object that
                                        is used for callbacks. Otherwise a single one for all
                                        `servicex` queries is used.
            config_adaptor              Control how configuration options are read from the
                                        a configuration file (e.g. servicex.yaml).
            data_convert_adaptor        Manages conversions between root and parquet and `pandas`
                                        and `awkward`, including default settings for expected
                                        datatypes from the backend.
            ignore_cache                Always ignore the cache on any query for this dataset. This
                                        is only meaningful if no cache adaptor is provided.
                                        Defaults to false - the cache is used if possible.

        Notes:

            -  The `status_callback` argument, by default, uses the `tqdm` library to render
               progress bars in a terminal window or a graphic in a Jupyter notebook (with proper
               jupyter extensions installed). If `status_callback` is specified as None, no
               updates will be rendered. A custom callback function can also be specified which
               takes `(total_files, transformed, downloaded, skipped)` as an argument. The
               `total_files` parameter may be `None` until the system knows how many files need to
               be processed (and some files can even be completed before that is known).
            -  The full description of calling parameters is recorded in the local cache, including
               things like `image` name and tag.
        '''
        ServiceXABC.__init__(self, dataset, image, max_workers,
                             status_callback_factory,
                             )

        # Get the local settings
        config = config_adaptor if config_adaptor is not None \
            else ServiceXConfigAdaptor()

        # Establish the cache that will store all our queries
        self._cache = Cache(get_configured_cache_path(config.settings), ignore_cache) \
            if cache_adaptor is None \
            else cache_adaptor

        if not servicex_adaptor:
            # Given servicex adaptor is none, this should be ok. Fixes type checkers
            end_point, token = config.get_servicex_adaptor_config(backend_name)
            servicex_adaptor = ServiceXAdaptor(end_point, token)
        self._servicex_adaptor = servicex_adaptor

        if not minio_adaptor:
            self._minio_adaptor = MinioAdaptorFactory()
        else:
            if isinstance(minio_adaptor, MinioAdaptor):
                self._minio_adaptor = MinioAdaptorFactory(always_return=minio_adaptor)
            else:
                self._minio_adaptor = minio_adaptor

        self._log = log_adaptor() if local_log is None else local_log

        self._session_generator = session_generator if session_generator is not None \
            else default_client_session

        self._return_types = [config.get_default_returned_datatype(backend_name)]
        self._converter = data_convert_adaptor if data_convert_adaptor is not None \
            else DataConverterAdaptor(self._return_types[0])
Beispiel #12
0
def test_default_ctor():
    x = ServiceXConfigAdaptor()
    assert isinstance(x.settings, ConfigSettings)
Beispiel #13
0
def test_returned_datatype_from_default_dict():
    c = ConfigSettings('servicex', 'servicex')
    c.clear()
    c['backend_types'] = [{'type': 'forkit', 'return_data': 'spoon'}]
    x = ServiceXConfigAdaptor(c)
    assert x.get_default_returned_datatype('forkit') == 'spoon'
Beispiel #14
0
def test_returned_datatype_default():
    c = ConfigSettings('servicex', 'servicex')
    c.clear()
    c['default_return_data'] = 'root'
    x = ServiceXConfigAdaptor(c)
    assert x.get_default_returned_datatype(None) == 'root'
Beispiel #15
0
def test_returned_datatype_nothing():
    c = ConfigSettings('servicex', 'servicex')
    c.clear()
    x = ServiceXConfigAdaptor(c)
    with pytest.raises(ServiceXException):
        x.get_default_returned_datatype(None)
Beispiel #16
0
def test_passed_in_settings():
    c = ConfigSettings('servicex', 'servicex')
    x = ServiceXConfigAdaptor(c)
    assert x.settings is c