def write_file(target_filename, table_spec, schema, max_records=-1): LOGGER.info('Syncing file "{}".'.format(target_filename)) target_uri = resolve_target_uri(table_spec, target_filename) records_synced = 0 try: iterator = tap_spreadsheets_anywhere.format_handler.get_row_iterator(table_spec, target_uri) for row in iterator: metadata = { '_smart_source_bucket': table_spec['path'], '_smart_source_file': target_filename, # index zero, +1 for header row '_smart_source_lineno': records_synced + 2 } try: record_with_meta = {**conversion.convert_row(row, schema), **metadata} singer.write_record(table_spec['name'], record_with_meta) except BrokenPipeError as bpe: LOGGER.error( f'Pipe to loader broke after {records_synced} records were written from {target_filename}: troubled ' f'line was {record_with_meta}') raise bpe records_synced += 1 if 0 < max_records <= records_synced: break except tap_spreadsheets_anywhere.format_handler.InvalidFormatError as ife: if table_spec.get('invalid_format_action','fail').lower() == "ignore": LOGGER.exception(f"Ignoring unparseable file: {target_filename}",ife) else: raise ife return records_synced
def write_file(target_filename, table_spec, schema): LOGGER.info('Syncing file "{}".'.format(target_filename)) target_uri = table_spec['path'] + '/' + target_filename iterator = tap_spreadsheets_anywhere.format_handler.get_row_iterator( table_spec, target_uri) records_synced = 0 for row in iterator: metadata = { '_smart_source_bucket': table_spec['path'], '_smart_source_file': target_filename, # index zero, +1 for header row '_smart_source_lineno': records_synced + 2 } try: to_write = [{**conversion.convert_row(row, schema), **metadata}] singer.write_records(table_spec['name'], to_write) except BrokenPipeError as bpe: LOGGER.error( f'Pipe to loader broke after {records_synced} records were written from {target_filename}: troubled ' f'line was {to_write[0]}') raise bpe records_synced += 1 return records_synced