def valid_fetcher_event(benchmark_event, benchmark_doc: BenchmarkDoc) -> FetcherBenchmarkEvent: payload = FetcherPayload( toml=benchmark_doc, # We don't care about datasets here datasets=[], ) return create_from_object(FetcherBenchmarkEvent, benchmark_event, payload=payload)
def benchmark_event_with_datasets_and_models(benchmark_doc: BenchmarkDoc, datasets, models) -> BenchmarkEvent: payload = FetcherPayload(toml=benchmark_doc, datasets=datasets, models=models) return get_benchmark_event(payload)
def fetcher_event(descriptor_as_adict) -> FetcherBenchmarkEvent: return FetcherBenchmarkEvent( action_id=ACTION_ID, message_id="MESSAGE_ID", client_id="CLIENT_ID", client_version="CLIENT_VERSION", client_username="******", authenticated=False, tstamp=42, visited=[], type="PRODUCER_TOPIC", payload=FetcherPayload( toml=BenchmarkDoc(contents=descriptor_as_adict.to_dict(), doc="", sha1="SHA"), scripts=SCRIPTS, datasets=[ DownloadableContent( src="http://someserver.com/somedata.zip", dst=DATASET_S3_URI, path="/mount/path", id=DATASET_ID, size_info=ContentSizeInfo(total_size=42, file_count=1, max_size=42), type=FetchedType.FILE, ) ], ), )
def test_create_from_object(base_event_as_dict): event = BenchmarkEvent.from_json(json.dumps(base_event_as_dict)) obj = create_from_object(FetcherBenchmarkEvent, event, payload=FetcherPayload(toml="", datasets=[])) assert type(obj) == FetcherBenchmarkEvent assert obj.action_id == event.action_id
def mock_scheduled_run_fetcher_event(): scheduled_run_benchmark_event = create_benchmark_event( scheduled_run_benchmark()) doc = scheduled_run_benchmark_event.payload.toml return create_from_object(FetcherBenchmarkEvent, scheduled_run_benchmark_event, payload=FetcherPayload(datasets=[], toml=doc))
def fetcher_event(benchmark_event: BenchmarkEvent) -> FetcherBenchmarkEvent: toml = BenchmarkDoc(contents={"var": "val"}, doc="DONTCARE", sha1="DONTCARE") return create_from_object(FetcherBenchmarkEvent, benchmark_event, payload=FetcherPayload(datasets=[], toml=toml))
def get_fetcher_benchmark_event(template_event: BenchmarkEvent, dataset_src: str, model_src: str): doc = BenchmarkDoc({"var": "val"}, "var = val", "") datasets = [] if not dataset_src else [ DownloadableContent(src=get_salted_src(dataset_src), path="/mount/path") ] models = [] if not model_src else [ DownloadableContent(src=get_salted_src(model_src), path="/mount/path") ] fetch_payload = FetcherPayload(toml=doc, datasets=datasets, models=models) return dataclasses.replace(template_event, payload=fetch_payload)
def create_fetcher_benchmark_event_from_dict( benchmark_event_dummy_payload: BenchmarkEvent, toml_dict: Dict[str, Any]) -> FetcherBenchmarkEvent: doc = BenchmarkDoc(toml_dict, "var = val", "") # We don't care about the initial TOML fetch_payload = FetcherPayload(toml=doc, datasets=[]) return create_from_object( FetcherBenchmarkEvent, benchmark_event_dummy_payload, payload=fetch_payload, action_id=benchmark_event_dummy_payload.action_id.replace("_", "-"), )
def benchmark_event(shared_datadir): descriptor_path = str(shared_datadir / "hello-world.toml") descriptor_as_dict = toml.load(descriptor_path) doc = BenchmarkDoc(contents=descriptor_as_dict, sha1="SHA1", doc="doc") payload = FetcherPayload(toml=doc, datasets=[]) return BenchmarkEvent( action_id=ACTION_ID, message_id="MESSAGE_ID", client_id="CLIENT_ID", client_version="CLIENT_VERSION", client_username="******", authenticated=False, tstamp=42, visited=[], type="BAI_APP_FETCHER", payload=payload, )
def benchmark_event_without_datasets_or_models( benchmark_doc: BenchmarkDoc) -> BenchmarkEvent: payload = FetcherPayload(toml=benchmark_doc, datasets=[]) return get_benchmark_event(payload)
def test_crazy_json(): json = ( '{"toml":{"contents": {"name": "doc"},"sha1": "sha1","doc": "dst"}, ' '"datasets" : [ [ {"src" : "s3://bucket/imagenet/train"}, { } ] ]}') with pytest.raises(TypeError): FetcherPayload.from_json(json)
import dacite import pytest from bai_kafka_utils.events import ( BenchmarkEvent, StatusMessageBenchmarkEvent, StatusMessageBenchmarkEventPayload, VisitedService, DownloadableContent, FetcherPayload, FetcherBenchmarkEvent, Status, ) FETCHER_PAYLOAD = FetcherPayload( datasets=[DownloadableContent(src="SRC", path="/mount/path")], toml=None) FETCHER_EVENT = FetcherBenchmarkEvent( action_id="OTHER_ACTION_ID", parent_action_id="PARENT_ACTION_ID", message_id="OTHER_MESSAGE_ID", client_id="OTHER_CLIENT_ID", client_version="0.1.0-481dad2", client_username="******", authenticated=False, tstamp=1556814924121, visited=[ VisitedService(svc="some", tstamp=1556814924121, version="1.0", node=None)
DownloadableContent("s3://bucket/model/inception", path="/models/inception", md5="5d41402abc4b2a76b9719d911017c592"), DownloadableContent("s3://bucket/models/mnist", path="/models/mnist"), ] EXPECTED_FETCHER_EVENT = FetcherBenchmarkEvent( action_id="ffea52eb-c24b-4dd0-b32e-61230db34ad5", message_id="007bd9f8-f564-4edb-bb48-7380ee562ffc", client_id="97e7eb322342626974fb171fc5793514b0aea789", client_version="0.1.0-481dad2", client_username="******", authenticated=False, tstamp=1556814924121, visited=EXPECTED_FETCHER_VISITED, type="BAI_APP_BFF", payload=FetcherPayload( datasets=EXPECTED_FETCHER_DATASETS, scripts=EXPECTED_FETCHER_SCRIPTS, models=EXPECTED_FETCHER_MODELS, toml=EXPECTED_FETCHER_DOC, ), ) def test_big_fetcher_json(): event = FetcherBenchmarkEvent.from_json(BIG_FETCHER_JSON) print(event) print(EXPECTED_FETCHER_EVENT) assert event == EXPECTED_FETCHER_EVENT