def test_sftp(self, cleanup, signup, testcfg): cfg_name = cfg_settings_pipe_sftp['name'] api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe(api, cfg_settings_parent_sftp) d6tpipe.upsert_pipe(api, cfg_settings_pipe_sftp) # test paths r, d = api.cnxn.pipes._(cfg_settings_parent_sftp['name']).get() assert d['options']['remotepath'] == '/' r, d = api.cnxn.pipes._(cfg_settings_pipe_sftp['name']).get() assert d['options'][ 'remotepath'] == cfg_settings_pipe_sftp['options']['dir'] + '/' # test push/pull pipe = getpipe(api, name=cfg_name, mode='all') pipe.delete_files_remote(confirm=False) cfg_copyfile = 'test.csv' df = pd.DataFrame({'a': range(10)}) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert pipe.scan_remote(cached=False) == [] assert pipe.pull() == [] assert pipe.push_preview() == [cfg_copyfile] assert pipe.push() == [cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull() == [cfg_copyfile] # cleanup pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] pipe.delete_files_local(confirm=False, delete_all=True)
def test_intro_stat_learning(self, cleanup, signup, testcfg): cfg_name = cfg_settings_islr['name'] cfg_filenames_islr = [ 'Advertising.csv', 'Advertising2.csv', 'Auto.csv', 'Ch10Ex11.csv', 'College.csv', 'Credit.csv', 'Heart.csv', 'Income1.csv', 'Income2.csv', 'LICENSE.md', 'README.md' ] # start with local repo pipelocal = d6tpipe.PipeLocal(cfg_name, profile=cfg_profile, filecfg=cfg_cfgfname) pipelocal.delete_files_local(confirm=False, delete_all=False) pipelocal.import_dir('tests/intro-stat-learning/') assert pipelocal.scan_local() == cfg_filenames_islr assert pipelocal.files() == [] assert pipelocal.files(fromdb=False) == cfg_filenames_islr df = pd.read_csv(pipelocal.dirpath / 'Advertising.csv') assert not df.empty if not testcfg.get('local', False): # set up public repo api = getapi() d6tpipe.upsert_pipe(api, cfg_settings_islr) d6tpipe.upsert_permissions(api, cfg_name, { "username": '******', "role": "read" }) pipe = d6tpipe.Pipe(api, cfg_name, mode='all') pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] assert pipe.push() == cfg_filenames_islr pipelocal = d6tpipe.PipeLocal(cfg_name, profile=cfg_profile, filecfg=cfg_cfgfname) assert len(pipelocal.schema) > 0 api2 = getapi2() pipe = d6tpipe.Pipe(api2, cfg_name) pipe.delete_files_local(confirm=False, delete_all=False) assert pipe.pull() == cfg_filenames_islr df = pd.read_csv(pipe.dirpath / 'Advertising.csv', **pipe.schema['pandas']) assert not df.empty import dask.dataframe as dd files = pipe.filepaths(include='Advertising*.csv') ddf = dd.read_csv(files, **pipe.schema['dask']) assert not ddf.compute().empty pipe.delete_files_local(confirm=False, delete_all=False) pipelocal.delete_files_local(confirm=False, delete_all=True)
def test_pipes_pull(self, cleanup, signup, parentinit, pipeinit, testcfg): api = getapi(testcfg.get('local',False)) pipe = getpipe(api) assert pipe.name in api.list_pipes() cfg_chk_crc = ['8a9782e9efa8befa9752045ca506a62e', '5fe579d6b71031dad399e8d4ea82820b', '4c7da169df85253d7ff737dde1e7400b', 'ca62a122993494e763fd1676cce95e76'] # assert False assert pipe.files() == [] assert pipe.scan_remote() == cfg_filenames_chk r, d = pipe.scan_remote(attributes=True) assert _filenames(d) == cfg_filenames_chk assert [o['crc'] for o in d]==cfg_chk_crc assert api.list_local_pipes()==[] assert pipe.pull_preview() == cfg_filenames_chk assert pipe.pull() == cfg_filenames_chk assert pipe.pull_preview() == [] assert api.list_local_pipes()==[pipe.name] assert pipe.files() == cfg_filenames_chk assert pipe.filepaths() == [Path(pipe.dirpath)/f for f in pipe.files()] assert pipe.filepaths(aspathlib=False) == [str(Path(pipe.dirpath)/f) for f in pipe.files()] pipe = getpipe(api, chk_empty=False, mode='all') assert pipe.pull_preview() == cfg_filenames_chk # PipeLocal pipelocal = d6tpipe.PipeLocal(pipe.name,profile=cfg_profile, filecfg=cfg_cfgfname) assert pipelocal.files() == cfg_filenames_chk assert pipelocal.scan_local() == cfg_filenames_chk assert pipelocal.schema == cfg_settings_pipe['schema'] df = pd.read_csv(pipe.dirpath/cfg_filenames_chk[0], **pipe.schema['pandas']) # permissions if not testcfg.get('local',False): api2 = getapi2(testcfg.get('local', False)) with pytest.raises(APIError, match='403'): pipe2 = getpipe(api2, name=cfg_pipe_name, mode='all') pipe2.pull() settings = {"username": cfg_usr2, "role": "read"} r,d = d6tpipe.upsert_permissions(api, cfg_parent_name, settings) pipe2 = getpipe(api2, name=cfg_pipe_name, mode='all') assert pipe2.pull()==cfg_filenames_chk # cleanup pipe.delete_files_local(confirm=False,delete_all=True)
def setup_ftp_base(self, testcfg): api = getapi(testcfg.get('local', False)) d6tpipe.upsert_pipe_json(api, 'tests/.creds-test.json', 'pipe-test-ftp') settings = d6tpipe.utils.loadjson('tests/.creds-test.json')['pipe-test-ftp'] settings.pop('options') settings['options'] = {'include':'root*.csv'} if testcfg.get('local', False): settings['options']['remotepath'] = '/' d6tpipe.upsert_pipe(api, settings) pipe = d6tpipe.Pipe(api, settings['name']) yield pipe pipe.delete_files_local(confirm=False,delete_all=True)
def setup_sftp_base(self, testcfg): api = getapi(testcfg.get('local', False)) settings = \ { 'name':'testftp', 'protocol':'sftp', 'location':'test.rebex.net', 'credentials':{'username':'******', 'password':'******'}, 'options': {'include':'*.txt'} } if testcfg.get('local', False): settings['options'] = {**{'remotepath': '/'}, **settings.get('options',{})} d6tpipe.upsert_pipe(api, settings) pipe = d6tpipe.Pipe(api, settings['name']) yield pipe pipe.delete_files_local(confirm=False,delete_all=True)
def pull(self): api = getapi(local=True) pipe = d6tpipe.pipe.Pipe(api, cfg_pipe_name) pipe.pull() yield True pipe.delete_files_local(confirm=False, delete_all=True)
def test_d6tfree(self, cleanup, signup, testcfg): if not testcfg.get('local', False): cfg_name = 'utest-d6tfree' api = getapi(testcfg.get('local', False)) # test quick create d6tpipe.upsert_pipe(api, {'name': cfg_name}) r, d = api.cnxn.pipes._(cfg_name).get() assert cfg_usr in d['options']['remotepath'] and cfg_name in d[ 'options']['remotepath'] and d['protocol'] == 's3' cred_read = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'read'})[1] cred_write = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'write'})[1] assert "aws_session_token" in cred_read and "aws_session_token" in cred_write assert cred_read['aws_access_key_id'] != cred_write[ 'aws_access_key_id'] # assert False # test force renew pipe = getpipe(api, name=cfg_name, mode='all') pipe._reset_credentials() cred_read2 = api.cnxn.pipes._(cfg_name).credentials.get( query_params={'role': 'read'})[1] assert cred_read2['aws_access_key_id'] != cred_read[ 'aws_access_key_id'] # test push/pull cfg_copyfile = 'folder/test.csv' cfg_copyfile2 = 'folder/test2.csv' pipe.delete_files_remote(confirm=False) df = pd.DataFrame({'a': range(10)}) (pipe.dirpath / cfg_copyfile).parent.mkdir(exist_ok=True) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) # assert False assert pipe.push() == [cfg_copyfile] pipe._cache_scan.clear() assert pipe.pull() == [cfg_copyfile] # permissions - no access api2 = getapi2(testcfg.get('local', False)) with pytest.raises(APIError, match='403'): pipe2 = getpipe(api2, name=cfg_name, mode='all') pipe2.pull() # permissions - read settings = {"username": cfg_usr2, "role": "read"} d6tpipe.upsert_permissions(api, cfg_name, settings) pipe2 = getpipe(api2, name=cfg_name, mode='all') assert pipe2.role == 'read' assert pipe2.pull() == [cfg_copyfile] df.to_csv(pipe2.dirpath / cfg_copyfile2, index=False) with pytest.raises(ValueError, match='Read-only'): pipe2.push() # permissions - write settings = {"username": cfg_usr2, "role": "write"} d6tpipe.upsert_permissions(api, cfg_name, settings) pipe2 = getpipe(api2, name=cfg_name, mode='all', chk_empty=False) assert pipe2.role == 'write' assert pipe2.pull() == [cfg_copyfile] assert pipe2.push() == [cfg_copyfile, cfg_copyfile2] # todo: check don't have access to parent paths in s3 # todo: file include patterns # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-daily','parent':'demo-vendor','options':{'include':'*daily*.csv'}}) # d6tpipe.upsert_pipe(api,{'name':'demo-vendor-monthly','parent':'demo-vendor','options':{'include':'*monthly*.csv'}}) # cleanup pipe.delete_files_remote(confirm=False) assert pipe.scan_remote(cached=False) == [] pipe.delete_files_local(confirm=False, delete_all=True)
def test_pipes_push(self, cleanup, signup, parentinit, pipeinit, testcfg): api = getapi(testcfg.get('local', False)) pipe = getpipe(api, chk_empty=False) pipe.delete_files_local(confirm=False, delete_all=True) assert pipe.scan_local() == [] pipe = getpipe(api) with pytest.raises(PushError): pipe.push_preview() pipe.pull() # push works cfg_copyfile = 'test.csv' df = pd.DataFrame({'a': range(10)}) df.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert set(pipe.scan_local()) == set(cfg_filenames_chk + [cfg_copyfile]) assert pipe.files() == cfg_filenames_chk assert pipe.push_preview() == [cfg_copyfile] assert pipe.push() == [cfg_copyfile] assert pipe.push_preview() == [] pipe._cache_scan.clear() assert pipe.pull_preview() == [] # doesn't take files not meet pattern cfg_copyfile2 = 'test.xlsx' df.to_csv(pipe.dirpath / cfg_copyfile2) assert pipe.push_preview() == [] (pipe.dirpath / cfg_copyfile2).unlink() # todo: push exclude # files() works assert pipe.files() == cfg_filenames_chk + [cfg_copyfile] assert pipe.files(include='Machine*.csv') == cfg_filenames_chk assert pipe.files(exclude='Machine*.csv') == [cfg_copyfile] assert pipe.files(sortby='mod')[-1] == cfg_copyfile # crc works df2 = pd.read_csv(pipe.dirpath / cfg_copyfile, **pipe.schema['pandas']) df2.to_csv(pipe.dirpath / cfg_copyfile, index=False) assert pipe.push_preview() == [] df.to_csv(pipe.dirpath / cfg_copyfile, index=True) assert pipe.push_preview() == [cfg_copyfile] # files param works assert pipe.pull(files=[cfg_copyfile]) == [cfg_copyfile] pipe.delete_files_remote(files=[cfg_copyfile], confirm=False) assert pipe._pullpush_luigi([cfg_copyfile], 'exists') == [False] assert pipe.push(files=[cfg_copyfile]) == [cfg_copyfile] assert pipe._pullpush_luigi([cfg_copyfile], 'exists') == [True] # remove_orphans works (pipe.dirpath / cfg_copyfile).unlink() pipe._cache_scan.clear() assert pipe.remove_orphans(direction='both', dryrun=True)['remote'] == [cfg_copyfile] assert pipe.remove_orphans(direction='both', dryrun=False)['remote'] == [cfg_copyfile] assert pipe._pullpush_luigi(['test.csv'], 'exists') == [False] # cleanup pipe.delete_files_local(confirm=False, delete_all=True) assert pipe.scan_local() == [] # def test_pipes_includeexclude(self, cleanup, parentinit, pipeinit, testcfg): # pass '''