def test_put_handlers(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() float_int = FloatIntTransformer() string = StringTransformer() elements = [float_store, int_source] transformers = {int_float, float_int, string} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) handlers = pipeline._put_handlers(str) assert type(handlers) is set assert len(handlers) is 1 handler = list(handlers)[0] assert handler._sink is float_store assert handler._store_type is float values = [ str(random.uniform(-VALUES_MAX, VALUES_MAX)) for _ in range(VALUES_COUNT) ] for value in values: assert type(handler._transform(data=value)) is float
def create_default_pipeline(api_key, verbose=False): from datapipelines import DataPipeline, CompositeDataTransformer from ..datastores.cache import Cache from ..datastores.riotapi import RiotAPI from ..datastores.ddragon import DDragonDataSource from ..transformers.staticdata import StaticDataTransformer from ..transformers.champion import ChampionTransformer from ..transformers.championmastery import ChampionMasteryTransformer from ..transformers.summoner import SummonerTransformer from ..transformers.match import MatchTransformer from ..transformers.masteries import MasteriesTransformer from ..transformers.runes import RunesTransformer from ..transformers.spectator import SpectatorTransformer from ..transformers.status import StatusTransformer from ..transformers.leagues import LeagueTransformer services = [ Cache(), # TODO Add expirations from file DDragonDataSource(), # TODO: Should this be default? RiotAPI(api_key=api_key) ] riotapi_transformer = CompositeDataTransformer([ StaticDataTransformer(), ChampionTransformer(), ChampionMasteryTransformer(), SummonerTransformer(), MatchTransformer(), MasteriesTransformer(), RunesTransformer(), SpectatorTransformer(), StatusTransformer(), LeagueTransformer() ]) pipeline = DataPipeline(services, [riotapi_transformer]) pipeline._transformer = riotapi_transformer # Manually put the cache on the pipeline. TODO Is this the best way? for datastore in services: if isinstance(datastore, Cache): pipeline._cache = datastore break else: pipeline._cache = None if verbose: for service in services: for p in service.provides: print("Provides:", p) for transformer in riotapi_transformer: for t in transformer.transforms.items(): print("Transformer:", t) print() return pipeline
def test_get_many_streaming(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() float_int = FloatIntTransformer() string = StringTransformer() elements = [float_store, int_source] transformers = {int_float, float_int, string} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) values = [ str(random.randint(-VALUES_MAX, VALUES_MAX)) for _ in range(VALUES_COUNT) ] for value in values: query = {VALUE_KEY: value, COUNT_KEY: VALUES_COUNT} result = pipeline.get_many(int, query, streaming=True) assert type(result) is GENERATOR_CLASS for res in result: assert type(res) is int assert res == int(value) assert float(value) in float_store.items float_store.items.clear() for value in values: query = {VALUE_KEY: value, COUNT_KEY: VALUES_COUNT} result = pipeline.get_many(float, query, streaming=True) assert type(result) is GENERATOR_CLASS for res in result: assert type(res) is float assert res == float(value) assert float(value) in float_store.items float_store.items.clear() for value in values: query = {VALUE_KEY: value, COUNT_KEY: VALUES_COUNT} result = pipeline.get_many(str, query, streaming=True) assert type(result) is GENERATOR_CLASS for res in result: assert type(res) is str assert res == value assert float(value) in float_store.items
def test_put_handlers_impossible(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() elements = [float_store, int_source] transformers = {int_float} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) with pytest.raises(NoConversionError): pipeline._put_handlers(str)
def test_put_many(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() float_int = FloatIntTransformer() string = StringTransformer() elements = [float_store, int_source] transformers = {int_float, float_int, string} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) values = [ random.randint(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: items = (value for _ in range(VALUES_COUNT)) result = pipeline.put_many(int, items) assert result is None assert float(value) in float_store.items float_store.items.clear() values = [ random.uniform(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: items = (value for _ in range(VALUES_COUNT)) result = pipeline.put_many(float, items) assert result is None assert value in float_store.items float_store.items.clear() values = [ str(random.uniform(-VALUES_MAX, VALUES_MAX)) for _ in range(VALUES_COUNT) ] for value in values: items = (value for _ in range(VALUES_COUNT)) result = pipeline.put_many(str, items) assert result is None assert float(value) in float_store.items
def test_get_handlers(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() float_int = FloatIntTransformer() string = StringTransformer() elements = [float_store, int_source] transformers = {int_float, float_int, string} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) handlers = pipeline._get_handlers(str) assert type(handlers) is list assert len(handlers) is 2 assert handlers[0]._source is float_store assert handlers[0]._source_type is float assert handlers[1]._source is int_source assert handlers[1]._source_type is int values = [ random.uniform(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: assert type(handlers[0]._transform(data=value)) is str values = [ random.randint(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: assert type(handlers[1]._transform(data=value)) is str assert len(handlers[0]._before_transform) == 0 assert len(handlers[0]._after_transform) == 0 assert len(handlers[1]._before_transform) == 1 assert len(handlers[1]._after_transform) == 1 sink_handler = list(handlers[1]._before_transform)[0] assert sink_handler._sink is float_store assert sink_handler._store_type is float for value in values: assert type(sink_handler._transform(data=value)) is float
def test_new_context(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() float_int = FloatIntTransformer() string = StringTransformer() elements = [float_store, int_source] transformers = {int_float, float_int, string} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) context = pipeline._new_context() assert type(context) is PipelineContext assert context[PipelineContext.Keys.PIPELINE] is pipeline
def create_pipeline(service_configs: Dict, verbose: bool = False) -> DataPipeline: transformers = [] # Always use the Riot API transformers from ..transformers import __transformers__ as riotapi_transformer transformers.extend(riotapi_transformer) # Add sources / sinks by name from config services = [] for store_name, config in service_configs.items(): package = config.pop("package", None) if package is None: package = "cassiopeia.datastores" module = importlib.import_module(name=package) store_cls = getattr(module, store_name) store = store_cls(**config) services.append(store) service_transformers = getattr(module, "__transformers__", []) transformers.extend(service_transformers) pipeline = DataPipeline(services, transformers) # Manually put the cache on the pipeline. from ..datastores import Cache for datastore in services: if isinstance(datastore, Cache): pipeline._cache = datastore break else: pipeline._cache = None if verbose: for service in services: print("Service:", service) for p in service.provides: print(" Provides:", p) for transformer in transformers: for t in transformer.transforms.items(): print("Transformer:", t) print() return pipeline
def create_pipeline(service_configs: Dict, verbose: int = 0) -> DataPipeline: transformers = [] # Always use the Riot API transformers from ..transformers import __transformers__ as riotapi_transformer transformers.extend(riotapi_transformer) # Add sources / sinks by name from config services = [] for store_name, config in service_configs.items(): package = config.pop("package", None) if package is None: package = "cassiopeia.datastores" module = importlib.import_module(name=package) store_cls = getattr(module, store_name) store = store_cls(**config) services.append(store) service_transformers = getattr(module, "__transformers__", []) transformers.extend(service_transformers) from ..datastores import Cache, MerakiAnalyticsCDN, LolWikia # Automatically insert the ghost store if it isn't there from ..datastores import UnloadedGhostStore found = False for datastore in services: if isinstance(datastore, UnloadedGhostStore): found = True break if not found: if any(isinstance(service, Cache) for service in services): # Find the cache and insert the ghost store directly after it for i, datastore in enumerate(services): if isinstance(datastore, Cache): services.insert(i + 1, UnloadedGhostStore()) break else: # Insert the ghost store at the beginning of the pipeline services.insert(0, UnloadedGhostStore()) services.append(MerakiAnalyticsCDN()) services.append(LolWikia()) pipeline = DataPipeline(services, transformers) # Manually put the cache on the pipeline. for datastore in services: if isinstance(datastore, Cache): pipeline._cache = datastore break else: pipeline._cache = None if verbose > 0: for service in services: print("Service:", service) if verbose > 1: if isinstance(service, DataSource): for p in service.provides: print(" Provides:", p) if isinstance(service, DataSink): for p in service.accepts: print(" Accepts:", p) if verbose > 2: for transformer in transformers: for t in transformer.transforms.items(): print("Transformer:", t) print() return pipeline
def create_pipeline(service_configs: Dict, verbose: int = 0) -> DataPipeline: transformers = [] # Always use the Riot API transformers from ..transformers import __transformers__ as riotapi_transformer transformers.extend(riotapi_transformer) # Add sources / sinks by name from config services = [] for store_name, config in service_configs.items(): package = config.pop("package", None) if package is None: package = "cassiopeia.datastores" module = importlib.import_module(name=package) store_cls = getattr(module, store_name) store = store_cls(**config) services.append(store) service_transformers = getattr(module, "__transformers__", []) transformers.extend(service_transformers) from ..datastores import Cache, MerakiAnalyticsCDN, LolWikia # Automatically insert the ghost store if it isn't there from ..datastores import UnloadedGhostStore found = False for datastore in services: if isinstance(datastore, UnloadedGhostStore): found = True break if not found: if any(isinstance(service, Cache) for service in services): # Find the cache and insert the ghost store directly after it for i, datastore in enumerate(services): if isinstance(datastore, Cache): services.insert(i+1, UnloadedGhostStore()) break else: # Insert the ghost store at the beginning of the pipeline services.insert(0, UnloadedGhostStore()) services.append(MerakiAnalyticsCDN()) services.append(LolWikia()) pipeline = DataPipeline(services, transformers) # Manually put the cache on the pipeline. for datastore in services: if isinstance(datastore, Cache): pipeline._cache = datastore break else: pipeline._cache = None if verbose > 0: for service in services: print("Service:", service) if verbose > 1: if isinstance(service, DataSource): for p in service.provides: print(" Provides:", p) if isinstance(service, DataSink): for p in service.accepts: print(" Accepts:", p) if verbose > 2: for transformer in transformers: for t in transformer.transforms.items(): print("Transformer:", t) print() return pipeline
def test_pipeline_transform(): int_source = IntSource() float_store = FloatStore() int_float = IntFloatTransformer() float_int = FloatIntTransformer() string = StringTransformer() elements = [float_store, int_source] transformers = {int_float, float_int, string} # noinspection PyTypeChecker pipeline = DataPipeline(elements, transformers) # Identity of type in graph transform, cost = pipeline._transform(int, int) assert cost == 0 assert transform is _identity values = [ random.randint(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: result = transform(data=value) assert type(result) is int assert result == value # Identity of type not in graph transform, cost = pipeline._transform(bool, bool) assert cost == 0 assert transform is _identity values = [random.choice([True, False]) for _ in range(VALUES_COUNT)] for value in values: result = transform(data=value) assert type(result) is bool assert result == value # Simple transform transform, cost = pipeline._transform(int, float) assert cost == 1 values = [ random.randint(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: result = transform(data=value) assert type(result) is float assert result == value # Avoid expensive transformer transform, cost = pipeline._transform(float, int) assert cost == 2 # It will go through the StringTransformer twice, which is cheaper than the FloatIntTransformer values = [ random.uniform(-VALUES_MAX, VALUES_MAX) for _ in range(VALUES_COUNT) ] for value in values: result = transform(data=value) assert type(result) is int assert result == int(value)