def test_init_options(s3root, pathspecs, expected): [pathspec] = pathspecs flow_name, run_id = pathspec.split('/') plen = len(s3root) # option 1) s3root as prefix with S3(s3root=s3root) as s3: for url, exp in expected.items(): # s3root should work as a prefix s3obj = s3.get(url[plen:]) assert s3obj.key == url[plen:] assert_results([s3obj], {url: exp}) with pytest.raises(MetaflowS3URLException): s3.get('s3://some/fake/address') # option 2) full url as s3root for url, exp in expected.items(): with S3(s3root=url) as s3: s3obj = s3.get() assert_results([s3obj], {url: exp}) # option 3) full urls with S3() as s3: for url, exp in expected.items(): # s3root should work as a prefix s3obj = s3.get(url) assert s3obj.key == url assert_results([s3obj], {url: exp}) with pytest.raises(MetaflowS3URLException): s3.get('suffix') with pytest.raises(MetaflowS3URLException): s3.get('s3://nopath') with pytest.raises(MetaflowS3URLException): s3.get_many(['suffixes']) with pytest.raises(MetaflowS3URLException): s3.get_recursive(['suffixes']) with pytest.raises(MetaflowS3URLException): s3.get_all() # option 4) 'current' environment (fake a running flow) flow = FakeFlow(use_cli=False) parsed = urlparse(s3root) with pytest.raises(MetaflowS3URLException): # current not set yet, so this should fail with S3(run=flow): pass current._set_env(flow_name, run_id, 'no_step', 'no_task', 'no_origin_run_id', 'no_ns', 'no_user') with S3(bucket=parsed.netloc, prefix=parsed.path, run=flow) as s3: for url, exp in expected.items(): name = url.split('/')[-1] s3obj = s3.get(name) assert s3obj.key == name assert_results([s3obj], {url: exp}) names = [url.split('/')[-1] for url in expected] s3objs = s3.get_many(names) assert {e.key for e in s3objs} == set(names) assert_results(s3objs, expected) assert_results(s3.get_all(), expected, info_should_be_empty=True)
def test_init_options(s3root, pathspecs, expected): [pathspec] = pathspecs flow_name, run_id = pathspec.split("/") plen = len(s3root) # option 1) s3root as prefix with S3(s3root=s3root) as s3: for url, exp in expected.items(): # s3root should work as a prefix s3obj = s3.get(url[plen:]) assert s3obj.key == url[plen:] assert_results([s3obj], {url: exp}) with pytest.raises(MetaflowS3URLException): s3.get("s3://some/fake/address") # option 2) full url as s3root for url, exp in expected.items(): with S3(s3root=url) as s3: s3obj = s3.get() assert_results([s3obj], {url: exp}) # option 3) full urls with S3() as s3: for url, exp in expected.items(): # s3root should work as a prefix s3obj = s3.get(url) assert s3obj.key == url assert_results([s3obj], {url: exp}) with pytest.raises(MetaflowS3URLException): s3.get("suffix") with pytest.raises(MetaflowS3URLException): s3.get("s3://nopath") with pytest.raises(MetaflowS3URLException): s3.get_many(["suffixes"]) with pytest.raises(MetaflowS3URLException): s3.get_recursive(["suffixes"]) with pytest.raises(MetaflowS3URLException): s3.get_all() # option 4) 'current' environment (fake a running flow) flow = FakeFlow(use_cli=False) parsed = urlparse(s3root) with pytest.raises(MetaflowS3URLException): # current not set yet, so this should fail with S3(run=flow): pass current._set_env( FakeFlow(name=flow_name), run_id, "no_step", "no_task", "no_origin_run_id", "no_ns", "no_user", ) with S3(bucket=parsed.netloc, prefix=parsed.path, run=flow) as s3: for url, exp in expected.items(): name = url.split("/")[-1] s3obj = s3.get(name) assert s3obj.key == name assert_results([s3obj], {url: exp}) names = [url.split("/")[-1] for url in expected] s3objs = s3.get_many(names) assert {e.key for e in s3objs} == set(names) assert_results(s3objs, expected) assert_results(s3.get_all(), expected, info_should_be_empty=True) # option 5) run object if DO_TEST_RUN: # Only works if a metadata service exists with the run in question. namespace(None) with S3(bucket=parsed.netloc, prefix=parsed.path, run=Run(pathspec)) as s3: names = [url.split("/")[-1] for url in expected] assert_results(s3.get_many(names), expected)