def test_ftp(self, cleanup, signup, testcfg): api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe_json(api, 'tests/.creds-test.json', 'pipe-test-ftp') cfg_name = 'test-ftp' # test paths r, d = api.cnxn.pipes._(cfg_name).get() assert d['options']['remotepath'] == '/utest/' # test push/pull pipe = getpipe(api, name=cfg_name, mode='all') cfg_copyfile = 'test.csv' df = pd.DataFrame({'a': range(10)}) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert pipe.scan_remote(cached=False) == [] assert pipe.pull() == [] assert pipe.push_preview() == [cfg_copyfile] assert pipe.push() == [cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull() == [cfg_copyfile] pipe.delete_files(confirm=False, all_local=True) assert pipe.scan_remote(cached=False) == []
def test_sftp(self, cleanup, signup, testcfg): cfg_name = cfg_settings_pipe_sftp['name'] api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe(api, cfg_settings_parent_sftp) d6tpipe.upsert_pipe(api, cfg_settings_pipe_sftp) # test paths r, d = api.cnxn.pipes._(cfg_settings_parent_sftp['name']).get() assert d['options']['remotepath'] == '/' r, d = api.cnxn.pipes._(cfg_settings_pipe_sftp['name']).get() assert d['options'][ 'remotepath'] == cfg_settings_pipe_sftp['options']['dir'] + '/' # test push/pull pipe = getpipe(api, name=cfg_name, mode='all') pipe.delete_files_remote(confirm=False) cfg_copyfile = 'test.csv' df = pd.DataFrame({'a': range(10)}) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert pipe.scan_remote(cached=False) == [] assert pipe.pull() == [] assert pipe.push_preview() == [cfg_copyfile] assert pipe.push() == [cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull() == [cfg_copyfile] # cleanup pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] pipe.delete_files_local(confirm=False, delete_all=True)
def test_intro_stat_learning(self, cleanup, signup, testcfg): cfg_name = cfg_settings_islr['name'] cfg_filenames_islr = [ 'Advertising.csv', 'Advertising2.csv', 'Auto.csv', 'Ch10Ex11.csv', 'College.csv', 'Credit.csv', 'Heart.csv', 'Income1.csv', 'Income2.csv', 'LICENSE.md', 'README.md' ] # start with local repo pipelocal = d6tpipe.PipeLocal(cfg_name, profile=cfg_profile, filecfg=cfg_cfgfname) pipelocal.delete_files_local(confirm=False, delete_all=False) pipelocal.import_dir('tests/intro-stat-learning/') assert pipelocal.scan_local() == cfg_filenames_islr assert pipelocal.files() == [] assert pipelocal.files(fromdb=False) == cfg_filenames_islr df = pd.read_csv(pipelocal.dirpath / 'Advertising.csv') assert not df.empty if not testcfg.get('local', False): # set up public repo api = getapi() d6tpipe.upsert_pipe(api, cfg_settings_islr) d6tpipe.upsert_permissions(api, cfg_name, { "username": '******', "role": "read" }) pipe = d6tpipe.Pipe(api, cfg_name, mode='all') pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] assert pipe.push() == cfg_filenames_islr pipelocal = d6tpipe.PipeLocal(cfg_name, profile=cfg_profile, filecfg=cfg_cfgfname) assert len(pipelocal.schema) > 0 api2 = getapi2() pipe = d6tpipe.Pipe(api2, cfg_name) pipe.delete_files_local(confirm=False, delete_all=False) assert pipe.pull() == cfg_filenames_islr df = pd.read_csv(pipe.dirpath / 'Advertising.csv', **pipe.schema['pandas']) assert not df.empty import dask.dataframe as dd files = pipe.filepaths(include='Advertising*.csv') ddf = dd.read_csv(files, **pipe.schema['dask']) assert not ddf.compute().empty pipe.delete_files_local(confirm=False, delete_all=False) pipelocal.delete_files_local(confirm=False, delete_all=True)
def test_d6tfree(self, cleanup, signup, testcfg): if not testcfg.get('local', False): cfg_name = 'utest-d6tfree' api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe(api, {'name': cfg_name}) r, d = api.cnxn.pipes._(cfg_name).get() assert cfg_usr in d['options']['remotepath'] and cfg_name in d[ 'options']['remotepath'] and d['protocol'] == 's3' cred_read = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'read'})[1] cred_write = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'write'})[1] assert "aws_session_token" in cred_read and "aws_session_token" in cred_write assert cred_read['aws_access_key_id'] != cred_write[ 'aws_access_key_id'] # assert False # test force renew pipe = getpipe(api, name=cfg_name, mode='all') pipe._reset_credentials() cred_read2 = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'read'})[1] assert cred_read2['aws_access_key_id'] != cred_read[ 'aws_access_key_id'] # test push/pull cfg_copyfile = 'folder/test.csv' cfg_copyfile2 = 'folder/test2.csv' pipe.delete_files_remote(confirm=False) df = pd.DataFrame({'a': range(10)}) (pipe.dirpath / cfg_copyfile).parent.mkdir(exist_ok=True) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) # assert False assert pipe.push() == [cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull() == [cfg_copyfile] # permissions - no access api2 = getapi2(testcfg.get('local', False)) with pytest.raises(APIError, match='403'): pipe2 = getpipe(api2, name=cfg_name, mode='all') pipe2.pull() # permissions - read settings = {"username": cfg_usr2, "role": "read"} d6tpipe.upsert_permissions(api, cfg_name, settings) pipe2 = getpipe(api2, name=cfg_name, mode='all') assert pipe2.role == 'read' assert pipe2.pull() == [cfg_copyfile] df.to_csv(pipe2.dirpath / cfg_copyfile2, index=False) with pytest.raises(ValueError, match='Read-only'): pipe2.push() # permissions - write settings = {"username": cfg_usr2, "role": "write"} d6tpipe.upsert_permissions(api, cfg_name, settings) pipe2 = getpipe(api2, name=cfg_name, mode='all', chk_empty=False) assert pipe2.role == 'write' assert pipe2.pull() == [cfg_copyfile] assert pipe2.push() == [cfg_copyfile, cfg_copyfile2] # todo: check don't have access to parent paths in s3 # todo: file include patterns # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-daily','parent':'demo-vendor','options':{'include':'*daily*.csv'}}) # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-monthly','parent':'demo-vendor','options':{'include':'*monthly*.csv'}}) # cleanup pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] pipe.delete_files_local(confirm=False, delete_all=True)
def test_pipes_push(self, cleanup, signup, parentinit, pipeinit, testcfg): api = getapi(testcfg.get('local', False)) pipe = getpipe(api, chk_empty=False) pipe.delete_files_local(confirm=False, delete_all=True) assert pipe.scan_local() == [] pipe = getpipe(api) with pytest.raises(PushError): pipe.push_preview() pipe.pull() # push works cfg_copyfile = 'test.csv' df = pd.DataFrame({'a': range(10)}) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert set(pipe.scan_local()) == set(cfg_filenames_chk + [cfg_copyfile]) assert pipe.files() == cfg_filenames_chk assert pipe.push_preview() == [cfg_copyfile] assert pipe.push() == [cfg_copyfile] assert pipe.push_preview() == [] pipe._cache_scan.clear() assert pipe.pull_preview() == [] # doesn't take files not meet pattern cfg_copyfile2 = 'test.xlsx' df.to_csv(pipe.dirpath / cfg_copyfile2) assert pipe.push_preview() == [] (pipe.dirpath / cfg_copyfile2).unlink() # todo: push exclude # files() works assert pipe.files() == cfg_filenames_chk + [cfg_copyfile] assert pipe.files(include='Machine*.csv') == cfg_filenames_chk assert pipe.files(exclude='Machine*.csv') == [cfg_copyfile] assert pipe.files(sortby='mod')[-1] == cfg_copyfile # crc works df2 = pd.read_csv(pipe.dirpath / cfg_copyfile, **pipe.schema['pandas']) df2.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert pipe.push_preview() == [] df.to_csv(pipe.dirpath / cfg_copyfile, index=True) assert pipe.push_preview() == [cfg_copyfile] # files param works assert pipe.pull(files=[cfg_copyfile]) == [cfg_copyfile] pipe.delete_files_remote(files=[cfg_copyfile], confirm=False) assert pipe._pullpush_luigi([cfg_copyfile], 'exists') == [False] assert pipe.push(files=[cfg_copyfile]) == [cfg_copyfile] assert pipe._pullpush_luigi([cfg_copyfile], 'exists') == [True] # remove_orphans works (pipe.dirpath / cfg_copyfile).unlink() pipe._cache_scan.clear() assert pipe.remove_orphans(direction='both', dryrun=True)['remote'] == [cfg_copyfile] assert pipe.remove_orphans(direction='both', dryrun=False)['remote'] == [cfg_copyfile] assert pipe._pullpush_luigi(['test.csv'], 'exists') == [False] # cleanup pipe.delete_files_local(confirm=False, delete_all=True) assert pipe.scan_local() == [] # def test_pipes_includeexclude(self, cleanup, parentinit, pipeinit, testcfg): # pass '''