Esempio n. 1
0
def main():
    args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
    config = args.config

    external_source = False

    if 'external_id' in config:
        args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS_EXTERNAL_SOURCE)
        config = args.config
        external_source = True

    config['tables'] = validate_table_config(config)

    # If external_id is provided, we are trying to access files in another AWS account, and need to assume the role
    if external_source:
        s3.setup_aws_client(config)
    # Otherwise, confirm that we can access the bucket in our own AWS account
    else:
        try:
            for page in s3.list_files_in_bucket(config['bucket']):
                break
        except BaseException as err:
            LOGGER.error(err)

    if args.discover:
        do_discover(args.config)
    elif args.properties:
        do_sync(config, args.properties, args.state)
Esempio n. 2
0
 def test_list_files_in_bucket(self):
     bucket = 'bucket-name'
     page = {'Contents':[{'key':'value'}]}
     with mock.patch('boto3.client') as m:
         m.get_paginator = Mock()
         with mock.patch('singer.get_logger') as patching:
             for list in list_files_in_bucket(bucket, "regex"):
                 patching.assert_called_with('Found no files for bucket "%s" that match prefix "%s"', bucket, "regex")
Esempio n. 3
0
def main():
    args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
    config = args.config

    config['tables'] = validate_table_config(config)

    # Check that boto can access S3
    try:
        for page in s3.list_files_in_bucket(config['bucket']):
            break
    except err:
        LOGGER.error(err)

    if args.discover:
        do_discover(args.config)
    elif args.properties:
        do_sync(config, args.properties, args.state)
Esempio n. 4
0
def main():
    args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
    config = args.config

    config['tables'] = validate_table_config(config)

    try:
        for page in s3.list_files_in_bucket(config['bucket']):
            break
        LOGGER.warning(
            "I have direct access to the bucket without assuming the configured role."
        )
    except:
        s3.setup_aws_client(config)

    if args.discover:
        do_discover(args.config)
    elif args.properties:
        do_sync(config, args.properties, args.state)
def main() -> None:
    """
    Main function
    :return: None
    """
    # We observed data who's field size exceeded the default maximum of
    # 131072. We believe the primary consequence of the following setting
    # is that a malformed, wide CSV would potentially parse into a single
    # large field rather than giving this error, but we also think the
    # chances of that are very small and at any rate the source data would
    # need to be fixed. The other consequence of this could be larger
    # memory consumption but that's acceptable as well.
    csv.field_size_limit(sys.maxsize)

    # Mock out `csv.field_size_limit` since messytables sets it...
    # TODO: replace messytables
    _field_size_limit = csv.field_size_limit
    csv.field_size_limit = lambda size: None

    args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
    config = args.config

    # Reassign the config tables to the validated object
    config['tables'] = CONFIG_CONTRACT(config.get('tables', {}))

    try:
        for _ in s3.list_files_in_bucket(config['bucket']):
            break
        LOGGER.warning(
            "I have direct access to the bucket without assuming the configured role."
        )
    except Exception:
        s3.setup_aws_client(config)

    if args.discover:
        do_discover(args.config)
    elif args.properties:
        do_sync(config, args.properties, args.state)

    csv.field_size_limit = _field_size_limit