コード例 #1
0
 def test_parse_path_relative_path_returns_new_path():
     """Tests that a relative path gets transformed
     """
     from dataduct.config import Config
     config = Config()
     config.etl['TEST_PATH'] = '/transform'
     eq_(parse_path('test/path', 'TEST_PATH'), '/transform/test/path')
コード例 #2
0
 def test_make_pipeline_url_has_region_correct():
     """Tests that make_pipeline_url makes a correct url with a region
     """
     from dataduct.config import Config
     config = Config()
     config.etl['REGION'] = 'test'
     eq_(
         make_pipeline_url('123'),
         'https://console.aws.amazon.com/datapipeline/?region=test#ExecutionDetailsPlace:pipelineId=123&show=latest'  # noqa
     )
コード例 #3
0
 def test_parse_path_expands_user():
     """Tests that parse_path expands the user symbol
     """
     from dataduct.config import Config
     config = Config()
     config.etl['TEST_PATH'] = '~/transform'
     eq_(
         parse_path('test/path', 'TEST_PATH'),
         os.path.expanduser('~/transform/test/path'),
     )
コード例 #4
0
def config_singleton_setup(args):
    """Setup the config singleton based on the mode in args

    Note:
        To instantiate the singleton object with the correct state as this is
        the single entry point to the library. We can use the __new__ function
        to set the debug_level

        We import inside the function as the singleton declaration should be
        done here and at no other entry point. The same pattern is followed
        at all the entry point scripts.
    """
    mode = args.mode if hasattr(args, 'mode') else None

    import logging
    logger = logging.getLogger(__name__)

    from dataduct.config import Config
    from dataduct.config import logger_configuration

    config = Config(mode=mode)

    # Setup up logging for package
    logger_configuration()

    if mode is not None:
        logger.warning('Running in %s mode', config.mode)
    return config
コード例 #5
0
 def test_parse_path_relative_path_no_matching_config_returns_itself():
     """Tests that the original path is returned if no matching
     transformation can be found
     """
     from dataduct.config import Config
     config = Config()
     config.etl.pop('TEST_PATH', None)
     eq_(parse_path('test/path', 'TEST_PATH'), 'test/path')
コード例 #6
0
def open_sql_shell(database_type, host_alias=None, **kwargs):
    """Opens a sql shell for MySQL or Redshift
    """

    # late import because we need the Singleton config to be loaded in
    # the dataduct main
    from dataduct.data_access import open_shell
    from dataduct.config import Config
    config = Config()
    if database_type == 'redshift':
        open_shell.open_psql_shell()
    else:
        assert config.mysql.get(host_alias), \
            'host_alias "{}" does not exist in config'.format(host_alias)
        open_shell.open_mysql_shell(sql_creds=config.mysql[host_alias])
コード例 #7
0
ファイル: hook.py プロジェクト: wpromatt/dataduct
def get_hooks(hook_name):
    """Returns the before hook and after hook (in a tuple) for a particular
    hook name
    """
    from dataduct.config import Config
    config = Config()

    if 'HOOKS_BASE_PATH' not in config.etl:
        return default_before_hook, default_after_hook

    hook_file = parse_path(hook_name + '.py', 'HOOKS_BASE_PATH')
    if not os.path.isfile(hook_file):
        return default_before_hook, default_after_hook

    # Delete the previous custom hook, so the imports are not merged.
    if 'custom_hook' in sys.modules:
        del sys.modules['custom_hook']

    # Get the hook functions, falling back to the default hooks
    custom_hook = imp.load_source('custom_hook', hook_file)
    before_hook = getattr(custom_hook, 'before_hook', default_before_hook)
    after_hook = getattr(custom_hook, 'after_hook', default_after_hook)

    return before_hook, after_hook
コード例 #8
0
ファイル: utils.py プロジェクト: wpromatt/dataduct
"""
Shared utility functions
"""
from boto.datapipeline import regions
from boto.datapipeline.layer1 import DataPipelineConnection
from time import sleep
import dateutil.parser

from dataduct.config import Config

config = Config()
REGION = config.etl.get('REGION', None)

DP_ACTUAL_END_TIME = '@actualEndTime'
DP_ATTEMPT_COUNT_KEY = '@attemptCount'
DP_INSTANCE_ID_KEY = 'id'
DP_INSTANCE_STATUS_KEY = '@status'


def _update_sleep_time(last_time=None):
    """Expotentially decay sleep times between calls incase of failures

    Note:
        Start time for sleep is 5 and the max is 60

    Args:
        last_time(int): time used in the last iteration

    Returns:
        next_time(int): time to sleep in the next iteration of the code