예제 #1
0
def records_with_state(records, state, stream_mapping,
                       state_cursor_paths) -> Iterable[Tuple[Any, Any]]:
    """Iterate over records and return cursor value with corresponding cursor value from state"""
    for record in records:
        stream_name = record.record.stream
        stream = stream_mapping[stream_name]
        helper = JsonSchemaHelper(schema=stream.stream.json_schema)
        cursor_field = helper.field(stream.cursor_field)
        record_value = cursor_field.parse(record=record.record.data)
        try:
            if state[stream_name] is None:
                continue

            # first attempt to parse the state value assuming the state object is namespaced on stream names
            state_value = cursor_field.parse(
                record=state[stream_name],
                path=state_cursor_paths[stream_name])
        except KeyError:
            try:
                # try second time as an absolute path in state file (i.e. bookmarks -> stream_name -> column -> value)
                state_value = cursor_field.parse(
                    record=state, path=state_cursor_paths[stream_name])
            except KeyError:
                continue
        yield record_value, state_value, stream_name
예제 #2
0
def records_with_state(records, state, stream_mapping, state_cursor_paths) -> Iterable[Tuple[Any, Any]]:
    """Iterate over records and return cursor value with corresponding cursor value from state"""
    for record in records:
        stream_name = record.record.stream
        stream = stream_mapping[stream_name]
        helper = JsonSchemaHelper(schema=stream.stream.json_schema)
        record_value = helper.get_cursor_value(record=record.record.data, cursor_path=stream.cursor_field)
        state_value = helper.get_state_value(state=state[stream_name], cursor_path=state_cursor_paths[stream_name])
        yield record_value, state_value
예제 #3
0
def primary_keys_by_stream(
    configured_catalog: ConfiguredAirbyteCatalog
) -> Mapping[str, List[CatalogField]]:
    """Get PK fields for each stream

    :param configured_catalog:
    :return:
    """
    data = {}
    for stream in configured_catalog.streams:
        helper = JsonSchemaHelper(schema=stream.stream.json_schema)
        pks = stream.primary_key or []
        data[stream.stream.name] = [helper.field(pk) for pk in pks]

    return data