def test_intro_stat_learning(self, cleanup, signup, testcfg): cfg_name = cfg_settings_islr['name'] cfg_filenames_islr = [ 'Advertising.csv', 'Advertising2.csv', 'Auto.csv', 'Ch10Ex11.csv', 'College.csv', 'Credit.csv', 'Heart.csv', 'Income1.csv', 'Income2.csv', 'LICENSE.md', 'README.md' ] # start with local repo pipelocal = d6tpipe.PipeLocal(cfg_name, profile=cfg_profile, filecfg=cfg_cfgfname) pipelocal.delete_files_local(confirm=False, delete_all=False) pipelocal.import_dir('tests/intro-stat-learning/') assert pipelocal.scan_local() == cfg_filenames_islr assert pipelocal.files() == [] assert pipelocal.files(fromdb=False) == cfg_filenames_islr df = pd.read_csv(pipelocal.dirpath / 'Advertising.csv') assert not df.empty if not testcfg.get('local', False): # set up public repo api = getapi() d6tpipe.upsert_pipe(api, cfg_settings_islr) d6tpipe.upsert_permissions(api, cfg_name, { "username": '******', "role": "read" }) pipe = d6tpipe.Pipe(api, cfg_name, mode='all') pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] assert pipe.push() == cfg_filenames_islr pipelocal = d6tpipe.PipeLocal(cfg_name, profile=cfg_profile, filecfg=cfg_cfgfname) assert len(pipelocal.schema) > 0 api2 = getapi2() pipe = d6tpipe.Pipe(api2, cfg_name) pipe.delete_files_local(confirm=False, delete_all=False) assert pipe.pull() == cfg_filenames_islr df = pd.read_csv(pipe.dirpath / 'Advertising.csv', **pipe.schema['pandas']) assert not df.empty import dask.dataframe as dd files = pipe.filepaths(include='Advertising*.csv') ddf = dd.read_csv(files, **pipe.schema['dask']) assert not ddf.compute().empty pipe.delete_files_local(confirm=False, delete_all=False) pipelocal.delete_files_local(confirm=False, delete_all=True)
def test_ftp(self, cleanup, signup, testcfg): api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe_json(api, 'tests/.creds-test.json', 'pipe-test-ftp') cfg_name = 'test-ftp' # test paths r,d = api.cnxn.pipes._(cfg_name).get() assert d['options']['remotepath']=='/utest/' # test push/pull pipe = getpipe(api, name=cfg_name, mode='all') cfg_copyfile = 'test.csv' df = pd.DataFrame({'a':range(10)}) df.to_csv(pipe.dirpath/cfg_copyfile,index=False) assert pipe.scan_remote(cached=False)==[] assert pipe.pull()==[] assert pipe.push_preview()==[cfg_copyfile] assert pipe.push()==[cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull()==[cfg_copyfile] pipe.delete_files(confirm=False,all_local=True) assert pipe.scan_remote(cached=False)==[]
def test_d6tfree(self, cleanup, signup, testcfg): if not testcfg.get('local', False): cfg_name = 'utest-d6tfree' api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe(api, {'name': cfg_name}) r, d = api.cnxn.pipes._(cfg_name).get() assert cfg_usr in d['options']['remotepath'] and cfg_name in d[ 'options']['remotepath'] and d['protocol'] == 's3' cred_read = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'read'})[1] cred_write = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'write'})[1] assert "aws_session_token" in cred_read and "aws_session_token" in cred_write assert cred_read['aws_access_key_id'] != cred_write[ 'aws_access_key_id'] # assert False # test force renew pipe = getpipe(api, name=cfg_name, mode='all') pipe._reset_credentials() cred_read2 = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'read'})[1] assert cred_read2['aws_access_key_id'] != cred_read[ 'aws_access_key_id'] # test push/pull cfg_copyfile = 'folder/test.csv' cfg_copyfile2 = 'folder/test2.csv' pipe.delete_files_remote(confirm=False) df = pd.DataFrame({'a': range(10)}) (pipe.dirpath / cfg_copyfile).parent.mkdir(exist_ok=True) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) # assert False assert pipe.push() == [cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull() == [cfg_copyfile] # permissions - no access api2 = getapi2(testcfg.get('local', False)) with pytest.raises(APIError, match='403'): pipe2 = getpipe(api2, name=cfg_name, mode='all') pipe2.pull() # permissions - read settings = {"username": cfg_usr2, "role": "read"} d6tpipe.upsert_permissions(api, cfg_name, settings) pipe2 = getpipe(api2, name=cfg_name, mode='all') assert pipe2.role == 'read' assert pipe2.pull() == [cfg_copyfile] df.to_csv(pipe2.dirpath / cfg_copyfile2, index=False) with pytest.raises(ValueError, match='Read-only'): pipe2.push() # permissions - write settings = {"username": cfg_usr2, "role": "write"} d6tpipe.upsert_permissions(api, cfg_name, settings) pipe2 = getpipe(api2, name=cfg_name, mode='all', chk_empty=False) assert pipe2.role == 'write' assert pipe2.pull() == [cfg_copyfile] assert pipe2.push() == [cfg_copyfile, cfg_copyfile2] # todo: check don't have access to parent paths in s3 # todo: file include patterns # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-daily','parent':'demo-vendor','options':{'include':'*daily*.csv'}}) # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-monthly','parent':'demo-vendor','options':{'include':'*monthly*.csv'}}) # cleanup pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] pipe.delete_files_local(confirm=False, delete_all=True)
def test_pipes_pull(self, cleanup, signup, parentinit, pipeinit, testcfg): api = getapi(testcfg.get('local', False)) pipe = getpipe(api) assert pipe.name in api.list_pipes() cfg_chk_crc = [ '8a9782e9efa8befa9752045ca506a62e', '5fe579d6b71031dad399e8d4ea82820b', '4c7da169df85253d7ff737dde1e7400b', 'ca62a122993494e763fd1676cce95e76' ] # assert False assert pipe.files() == [] assert pipe.scan_remote() == cfg_filenames_chk r, d = pipe.scan_remote(attributes=True) assert _filenames(d) == cfg_filenames_chk assert [o['crc'] for o in d] == cfg_chk_crc assert api.list_local_pipes() == [] assert pipe.pull_preview() == cfg_filenames_chk assert pipe.pull() == cfg_filenames_chk assert pipe.pull_preview() == [] assert api.list_local_pipes() == [pipe.name] assert pipe.files() == cfg_filenames_chk assert pipe.filepaths() == [ Path(pipe.dirpath) / f for f in pipe.files() ] assert pipe.filepaths(aspathlib=False) == [ str(Path(pipe.dirpath) / f) for f in pipe.files() ] pipe = getpipe(api, chk_empty=False, mode='all') assert pipe.pull_preview() == cfg_filenames_chk # PipeLocal pipelocal = d6tpipe.PipeLocal(pipe.name, profile=cfg_profile, filecfg=cfg_cfgfname) assert pipelocal.files() == cfg_filenames_chk assert pipelocal.scan_local() == cfg_filenames_chk assert pipelocal.schema == cfg_settings_pipe['schema'] df = pd.read_csv(pipe.dirpath / cfg_filenames_chk[0], **pipe.schema['pandas']) # permissions if not testcfg.get('local', False): api2 = getapi2(testcfg.get('local', False)) with pytest.raises(APIError, match='403'): pipe2 = getpipe(api2, name=cfg_pipe_name, mode='all') pipe2.pull() settings = {"username": cfg_usr2, "role": "read"} r, d = d6tpipe.upsert_permissions(api, cfg_parent_name, settings) pipe2 = getpipe(api2, name=cfg_pipe_name, mode='all') assert pipe2.pull() == cfg_filenames_chk # cleanup pipe.delete_files_local(confirm=False, delete_all=True)