def test_parse_path_relative_path_returns_new_path(): """Tests that a relative path gets transformed """ from dataduct.config import Config config = Config() config.etl['TEST_PATH'] = '/transform' eq_(parse_path('test/path', 'TEST_PATH'), '/transform/test/path')
def test_make_pipeline_url_has_region_correct(): """Tests that make_pipeline_url makes a correct url with a region """ from dataduct.config import Config config = Config() config.etl['REGION'] = 'test' eq_( make_pipeline_url('123'), 'https://console.aws.amazon.com/datapipeline/?region=test#ExecutionDetailsPlace:pipelineId=123&show=latest' # noqa )
def test_parse_path_expands_user(): """Tests that parse_path expands the user symbol """ from dataduct.config import Config config = Config() config.etl['TEST_PATH'] = '~/transform' eq_( parse_path('test/path', 'TEST_PATH'), os.path.expanduser('~/transform/test/path'), )
def config_singleton_setup(args): """Setup the config singleton based on the mode in args Note: To instantiate the singleton object with the correct state as this is the single entry point to the library. We can use the __new__ function to set the debug_level We import inside the function as the singleton declaration should be done here and at no other entry point. The same pattern is followed at all the entry point scripts. """ mode = args.mode if hasattr(args, 'mode') else None import logging logger = logging.getLogger(__name__) from dataduct.config import Config from dataduct.config import logger_configuration config = Config(mode=mode) # Setup up logging for package logger_configuration() if mode is not None: logger.warning('Running in %s mode', config.mode) return config
def test_parse_path_relative_path_no_matching_config_returns_itself(): """Tests that the original path is returned if no matching transformation can be found """ from dataduct.config import Config config = Config() config.etl.pop('TEST_PATH', None) eq_(parse_path('test/path', 'TEST_PATH'), 'test/path')
def open_sql_shell(database_type, host_alias=None, **kwargs): """Opens a sql shell for MySQL or Redshift """ # late import because we need the Singleton config to be loaded in # the dataduct main from dataduct.data_access import open_shell from dataduct.config import Config config = Config() if database_type == 'redshift': open_shell.open_psql_shell() else: assert config.mysql.get(host_alias), \ 'host_alias "{}" does not exist in config'.format(host_alias) open_shell.open_mysql_shell(sql_creds=config.mysql[host_alias])
def get_hooks(hook_name): """Returns the before hook and after hook (in a tuple) for a particular hook name """ from dataduct.config import Config config = Config() if 'HOOKS_BASE_PATH' not in config.etl: return default_before_hook, default_after_hook hook_file = parse_path(hook_name + '.py', 'HOOKS_BASE_PATH') if not os.path.isfile(hook_file): return default_before_hook, default_after_hook # Delete the previous custom hook, so the imports are not merged. if 'custom_hook' in sys.modules: del sys.modules['custom_hook'] # Get the hook functions, falling back to the default hooks custom_hook = imp.load_source('custom_hook', hook_file) before_hook = getattr(custom_hook, 'before_hook', default_before_hook) after_hook = getattr(custom_hook, 'after_hook', default_after_hook) return before_hook, after_hook
""" Shared utility functions """ from boto.datapipeline import regions from boto.datapipeline.layer1 import DataPipelineConnection from time import sleep import dateutil.parser from dataduct.config import Config config = Config() REGION = config.etl.get('REGION', None) DP_ACTUAL_END_TIME = '@actualEndTime' DP_ATTEMPT_COUNT_KEY = '@attemptCount' DP_INSTANCE_ID_KEY = 'id' DP_INSTANCE_STATUS_KEY = '@status' def _update_sleep_time(last_time=None): """Expotentially decay sleep times between calls incase of failures Note: Start time for sleep is 5 and the max is 60 Args: last_time(int): time used in the last iteration Returns: next_time(int): time to sleep in the next iteration of the code