def test_returned_datatype_no_type_endpoint(): c = ConfigSettings('servicex', 'servicex') c.clear() c['backend_types'] = [{'type': 'forkit', 'return_data': 'spoon'}] c['api_endpoints'] = [{'endpoint': 'http://localhost:5000'}] x = ServiceXConfigAdaptor(c) assert x.get_default_returned_datatype('forkit') == 'spoon'
def test_sx_adaptor_settings_name_not_type(caplog): from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [ { 'type': 'my-type1', 'name': 'my-fork1', 'endpoint': 'http://my-left-foot1.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }, { 'type': 'my-type2', 'name': 'my-type1', 'endpoint': 'http://my-left-foot2.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }, ] x = ServiceXConfigAdaptor(c) endpoint, token = x.get_servicex_adaptor_config('my-type1') assert endpoint == 'http://my-left-foot2.com:5000' assert token == 'forkingshirtballs.thegoodplace.bortles' assert len(caplog.record_tuples) == 0
def test_sx_adaptor_nothing(): c = ConfigSettings('servicex', 'servicex') c.clear() x = ServiceXConfigAdaptor(c) with pytest.raises(ServiceXException): x.get_servicex_adaptor_config()
def test_default_config_endpoint(): c = ConfigSettings('servicex', 'servicex') c.clear() c._add_default_source() x = ServiceXConfigAdaptor(c) end_point, token = x.get_servicex_adaptor_config() assert end_point == 'http://localhost:5000' assert token is None
def test_sx_adaptor_settings_backend_name_requested_with_unlabeled_type( caplog): 'Request None for a backend name' from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [{ 'endpoint': 'http://my-left-foot.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }] x = ServiceXConfigAdaptor(c) with pytest.raises(ServiceXException) as e: _ = x.get_servicex_adaptor_config('xaod') assert 'Unable to find' in str(e)
def test_sx_adaptor_settings_name_worng(caplog): from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [{ 'type': 'my-type', 'name': 'my-fork', 'endpoint': 'http://my-left-foot.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }] x = ServiceXConfigAdaptor(c) with pytest.raises(ServiceXException) as e: x.get_servicex_adaptor_config('my-type') assert 'Unable to find type my-type' in str(e)
def test_sx_adaptor_settings_backend_name_unlabeled_type(): 'Request None for a backend name' from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [{ 'type': 'xaod', 'endpoint': 'http://my-left-foot.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }, { 'endpoint': 'http://my-left-foot.com:5001', 'token': 'forkingshirtballs.thegoodplace.bortles1' }] x = ServiceXConfigAdaptor(c) endpoint, token = x.get_servicex_adaptor_config() assert endpoint == 'http://my-left-foot.com:5001' assert token == 'forkingshirtballs.thegoodplace.bortles1'
def test_sx_adaptor_settings_backend_name_requested_with_unlabeled_type( caplog): 'Request None for a backend name' from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [{ 'endpoint': 'http://my-left-foot.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }] x = ServiceXConfigAdaptor(c) endpoint, token = x.get_servicex_adaptor_config('xaod') assert endpoint == 'http://my-left-foot.com:5000' assert token == 'forkingshirtballs.thegoodplace.bortles' assert caplog.record_tuples[0][2] == "No 'xaod' backend type found, " \ "using http://my-left-foot.com:5000 - please add to " \ "the configuration file (e.g. servicex.yaml)"
def test_sx_adaptor_settings_no_backend_name_requested_or_listed(caplog): 'Request None for a backend name' from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [{ 'endpoint': 'http://my-left-foot.com:5000', 'token': 'forkingshirtballs.thegoodplace.bortles' }] x = ServiceXConfigAdaptor(c) endpoint, token = x.get_servicex_adaptor_config() assert endpoint == 'http://my-left-foot.com:5000' assert token == 'forkingshirtballs.thegoodplace.bortles' assert caplog.record_tuples[0][2] == "No backend type requested, " \ "using http://my-left-foot.com:5000 - please be " \ "explicit " \ "in the ServiceXDataset constructor"
def test_sx_adaptor_settings_env(): from confuse import Configuration c = Configuration('bogus', 'bogus') c.clear() c['api_endpoints'] = [{ 'type': '${SXTYPE}', 'endpoint': '${ENDPOINT}:5000', 'token': '${SXTOKEN}', }] from os import environ environ['ENDPOINT'] = 'http://tachi.com' environ['SXTYPE'] = 'mcrn' environ['SXTOKEN'] = 'protomolecule' x = ServiceXConfigAdaptor(c) endpoint, token = x.get_servicex_adaptor_config('mcrn') assert endpoint == 'http://tachi.com:5000' assert token == 'protomolecule'
def __init__(self, dataset: DatasetType, backend_name: Optional[str] = None, image: str = None, max_workers: int = 20, servicex_adaptor: ServiceXAdaptor = None, minio_adaptor: Union[MinioAdaptor, MinioAdaptorFactory] = None, cache_adaptor: Optional[Cache] = None, status_callback_factory: Optional[StatusUpdateFactory] = _run_default_wrapper, local_log: log_adaptor = None, session_generator: Callable[[], Awaitable[aiohttp.ClientSession]] = None, config_adaptor: Optional[ServiceXConfigAdaptor] = None, data_convert_adaptor: Optional[DataConverterAdaptor] = None, ignore_cache: bool = False): ''' Create and configure a ServiceX object for a dataset. Arguments dataset Name of a dataset from which queries will be selected. backend_name The type of backend. Used only if we need to find an end-point. If we do not have a `servicex_adaptor` then this will default to xaod, unless you have any endpoint listed in your servicex file. It will default to best match there, or fail if a name has been given. image Name of transformer image to use to transform the data. If left as default, `None`, then the default image for the ServiceX backend will be used. max_workers Maximum number of transformers to run simultaneously on ServiceX. servicex_adaptor Object to control communication with the servicex instance at a particular ip address with certian login credentials. Will be configured via the `config_adaptor` by default. minio_adaptor Object to control communication with the minio servicex instance. cache_adaptor Runs the caching for data and queries that are sent up and down. status_callback_factory Factory to create a status notification callback for each query. One is created per query. local_log Log adaptor for logging. session_generator If you want to control the `ClientSession` object that is used for callbacks. Otherwise a single one for all `servicex` queries is used. config_adaptor Control how configuration options are read from the a configuration file (e.g. servicex.yaml). data_convert_adaptor Manages conversions between root and parquet and `pandas` and `awkward`, including default settings for expected datatypes from the backend. ignore_cache Always ignore the cache on any query for this dataset. This is only meaningful if no cache adaptor is provided. Defaults to false - the cache is used if possible. Notes: - The `status_callback` argument, by default, uses the `tqdm` library to render progress bars in a terminal window or a graphic in a Jupyter notebook (with proper jupyter extensions installed). If `status_callback` is specified as None, no updates will be rendered. A custom callback function can also be specified which takes `(total_files, transformed, downloaded, skipped)` as an argument. The `total_files` parameter may be `None` until the system knows how many files need to be processed (and some files can even be completed before that is known). - The full description of calling parameters is recorded in the local cache, including things like `image` name and tag. ''' ServiceXABC.__init__(self, dataset, image, max_workers, status_callback_factory, ) # Get the local settings config = config_adaptor if config_adaptor is not None \ else ServiceXConfigAdaptor() # Establish the cache that will store all our queries self._cache = Cache(get_configured_cache_path(config.settings), ignore_cache) \ if cache_adaptor is None \ else cache_adaptor if not servicex_adaptor: # Given servicex adaptor is none, this should be ok. Fixes type checkers end_point, token = config.get_servicex_adaptor_config(backend_name) servicex_adaptor = ServiceXAdaptor(end_point, token) self._servicex_adaptor = servicex_adaptor if not minio_adaptor: self._minio_adaptor = MinioAdaptorFactory() else: if isinstance(minio_adaptor, MinioAdaptor): self._minio_adaptor = MinioAdaptorFactory(always_return=minio_adaptor) else: self._minio_adaptor = minio_adaptor self._log = log_adaptor() if local_log is None else local_log self._session_generator = session_generator if session_generator is not None \ else default_client_session self._return_types = [config.get_default_returned_datatype(backend_name)] self._converter = data_convert_adaptor if data_convert_adaptor is not None \ else DataConverterAdaptor(self._return_types[0])
def test_default_ctor(): x = ServiceXConfigAdaptor() assert isinstance(x.settings, ConfigSettings)
def test_returned_datatype_from_default_dict(): c = ConfigSettings('servicex', 'servicex') c.clear() c['backend_types'] = [{'type': 'forkit', 'return_data': 'spoon'}] x = ServiceXConfigAdaptor(c) assert x.get_default_returned_datatype('forkit') == 'spoon'
def test_returned_datatype_default(): c = ConfigSettings('servicex', 'servicex') c.clear() c['default_return_data'] = 'root' x = ServiceXConfigAdaptor(c) assert x.get_default_returned_datatype(None) == 'root'
def test_returned_datatype_nothing(): c = ConfigSettings('servicex', 'servicex') c.clear() x = ServiceXConfigAdaptor(c) with pytest.raises(ServiceXException): x.get_default_returned_datatype(None)
def test_passed_in_settings(): c = ConfigSettings('servicex', 'servicex') x = ServiceXConfigAdaptor(c) assert x.settings is c