def main(): args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS) config = args.config external_source = False if 'external_id' in config: args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS_EXTERNAL_SOURCE) config = args.config external_source = True config['tables'] = validate_table_config(config) # If external_id is provided, we are trying to access files in another AWS account, and need to assume the role if external_source: s3.setup_aws_client(config) # Otherwise, confirm that we can access the bucket in our own AWS account else: try: for page in s3.list_files_in_bucket(config['bucket']): break except BaseException as err: LOGGER.error(err) if args.discover: do_discover(args.config) elif args.properties: do_sync(config, args.properties, args.state)
def test_list_files_in_bucket(self): bucket = 'bucket-name' page = {'Contents':[{'key':'value'}]} with mock.patch('boto3.client') as m: m.get_paginator = Mock() with mock.patch('singer.get_logger') as patching: for list in list_files_in_bucket(bucket, "regex"): patching.assert_called_with('Found no files for bucket "%s" that match prefix "%s"', bucket, "regex")
def main(): args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS) config = args.config config['tables'] = validate_table_config(config) # Check that boto can access S3 try: for page in s3.list_files_in_bucket(config['bucket']): break except err: LOGGER.error(err) if args.discover: do_discover(args.config) elif args.properties: do_sync(config, args.properties, args.state)
def main(): args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS) config = args.config config['tables'] = validate_table_config(config) try: for page in s3.list_files_in_bucket(config['bucket']): break LOGGER.warning( "I have direct access to the bucket without assuming the configured role." ) except: s3.setup_aws_client(config) if args.discover: do_discover(args.config) elif args.properties: do_sync(config, args.properties, args.state)
def main() -> None: """ Main function :return: None """ # We observed data who's field size exceeded the default maximum of # 131072. We believe the primary consequence of the following setting # is that a malformed, wide CSV would potentially parse into a single # large field rather than giving this error, but we also think the # chances of that are very small and at any rate the source data would # need to be fixed. The other consequence of this could be larger # memory consumption but that's acceptable as well. csv.field_size_limit(sys.maxsize) # Mock out `csv.field_size_limit` since messytables sets it... # TODO: replace messytables _field_size_limit = csv.field_size_limit csv.field_size_limit = lambda size: None args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS) config = args.config # Reassign the config tables to the validated object config['tables'] = CONFIG_CONTRACT(config.get('tables', {})) try: for _ in s3.list_files_in_bucket(config['bucket']): break LOGGER.warning( "I have direct access to the bucket without assuming the configured role." ) except Exception: s3.setup_aws_client(config) if args.discover: do_discover(args.config) elif args.properties: do_sync(config, args.properties, args.state) csv.field_size_limit = _field_size_limit