def records_with_state(records, state, stream_mapping, state_cursor_paths) -> Iterable[Tuple[Any, Any]]: """Iterate over records and return cursor value with corresponding cursor value from state""" for record in records: stream_name = record.record.stream stream = stream_mapping[stream_name] helper = JsonSchemaHelper(schema=stream.stream.json_schema) cursor_field = helper.field(stream.cursor_field) record_value = cursor_field.parse(record=record.record.data) try: if state[stream_name] is None: continue # first attempt to parse the state value assuming the state object is namespaced on stream names state_value = cursor_field.parse( record=state[stream_name], path=state_cursor_paths[stream_name]) except KeyError: try: # try second time as an absolute path in state file (i.e. bookmarks -> stream_name -> column -> value) state_value = cursor_field.parse( record=state, path=state_cursor_paths[stream_name]) except KeyError: continue yield record_value, state_value, stream_name
def records_with_state(records, state, stream_mapping, state_cursor_paths) -> Iterable[Tuple[Any, Any]]: """Iterate over records and return cursor value with corresponding cursor value from state""" for record in records: stream_name = record.record.stream stream = stream_mapping[stream_name] helper = JsonSchemaHelper(schema=stream.stream.json_schema) record_value = helper.get_cursor_value(record=record.record.data, cursor_path=stream.cursor_field) state_value = helper.get_state_value(state=state[stream_name], cursor_path=state_cursor_paths[stream_name]) yield record_value, state_value
def primary_keys_by_stream( configured_catalog: ConfiguredAirbyteCatalog ) -> Mapping[str, List[CatalogField]]: """Get PK fields for each stream :param configured_catalog: :return: """ data = {} for stream in configured_catalog.streams: helper = JsonSchemaHelper(schema=stream.stream.json_schema) pks = stream.primary_key or [] data[stream.stream.name] = [helper.field(pk) for pk in pks] return data