def do_sync(args): logger.info('Starting sync.') meltano_config = load_json_file(args.config) bucket_files_definition = meltano_config.get("bucket_files_definition", None) if bucket_files_definition: if os.path.isfile(bucket_files_definition): config = tap_s3_csv.config.load(bucket_files_definition) else: logger.error("tap_s3_csv: '{}' file not found".format( bucket_files_definition)) exit(1) else: check_config(CONFIG, REQUIRED_CONFIG_KEYS) csv_files = CONFIG['files'] state = load_state(args.state) for table in config['tables']: state = sync_table(config, state, table) state = {'COMPLETED': True} singer.write_state(state) logger.info('Done syncing.')
def sync_table_file(config, s3_file, table_spec, schema): logger.info('Syncing file "{}".'.format(s3_file)) bucket = config['bucket'] table_name = table_spec['name'] iterator = tap_s3_csv.format_handler.get_row_iterator( config, table_spec, s3_file) records_synced = 0 for row in iterator: metadata = { '_s3_source_bucket': bucket, '_s3_source_file': s3_file, # index zero, +1 for header row '_s3_source_lineno': records_synced + 2 } try: to_write = [{**conversion.convert_row(row, schema), **metadata}] singer.write_records(table_name, to_write) except BrokenPipeError as bpe: logger.error( f'Pipe to loader broke after {records_synced} records were written from {s3_file}: troubled line was {row}' ) raise bpe records_synced += 1 return records_synced