Beispiel #1
0
def json_spec(col_defs: List[Tuple[str, DType]],
              mapping: Dict = None) -> KeyValueSpec:
    """Creates a spec for how to use JSON data when consuming a Kafka stream to a Deephaven table.

    Args:
        col_defs (List[Tuple[str, DType]]):  a list of tuples specifying names and types for columns to be
            created on the resulting Deephaven table.  Tuples contain two elements, a string for column name
            and a Deephaven type for column data type.
        mapping (Dict):  a dict mapping JSON fields to column names defined in the col_defs
            argument.  Fields starting with a '/' character are interpreted as a JSON Pointer (see RFC 6901,
            ISSN: 2070-1721 for details, essentially nested fields are represented like "/parent/nested").
            Fields not starting with a '/' character are interpreted as toplevel field names.
            If the mapping argument is not present or None, a 1:1 mapping between JSON fields and Deephaven
           table column names is assumed.

    Returns:
        a KeyValueSpec

    Raises:
        DHError
    """
    try:
        col_defs = [
            c.j_column_definition for c in _build_column_definitions(col_defs)
        ]
        if mapping is None:
            return KeyValueSpec(j_spec=_JKafkaTools_Consume.jsonSpec(col_defs))
        mapping = j_hashmap(mapping)
        return KeyValueSpec(
            j_spec=_JKafkaTools_Consume.jsonSpec(col_defs, mapping))
    except Exception as e:
        raise DHError(e, "failed to create a Kafka key/value spec") from e
Beispiel #2
0
def json_spec(
    include_columns: List[str] = None,
    exclude_columns: List[str] = None,
    mapping: Dict[str, str] = None,
    nested_delim: str = None,
    output_nulls: bool = False,
    timestamp_field: str = None,
) -> KeyValueSpec:
    """Creates a spec for how to generate JSON data when producing a Kafka stream from a Deephaven table.

    Because JSON is a nested structure, a Deephaven column can be specified to map to a top level JSON field or
    a field nested inside another JSON object many levels deep, e.g. X.Y.Z.field. The parameter 'nested_delim' controls
    how a JSON nested field name should be delimited in the mapping.

    Args:
        include_columns (List[str]): the list of Deephaven column names to include in the JSON output as fields,
            default is None, meaning all except the ones mentioned in the 'exclude_columns' argument . If not None,
            the 'exclude_columns' must be None.
        exclude_columns (List[str]): the list of Deephaven column names to omit in the JSON output as fields, default
            is None, meaning no column is omitted. If not None, include_columns must be None.
        mapping (Dict[str, str]): a mapping from column names to JSON field names.  Any column name implied by earlier
            arguments and not included as a key in the map implies a field of the same name. default is None,
            meaning all columns will be mapped to JSON fields of the same name.
        nested_delim (str): if nested JSON fields are desired, the field separator that is used for the field names
            parameter, or None for no nesting (default). For instance, if a particular column should be mapped
            to JSON field X nested inside field Y, the corresponding field name value for the column key
            in the mapping dict can be the string "X.Y", in which case the value for nested_delim should be "."
        output_nulls (bool): when False (default), do not output a field for null column values
        timestamp_field (str): the name of an extra timestamp field to be included in the produced Kafka message body,
            it is used mostly for debugging slowdowns,  default is None.

    Returns:
        a KeyValueSpec

    Raises:
        DHError
    """
    try:
        if include_columns is not None and exclude_columns is not None:
            raise ValueError(
                "One of include_columns and exclude_columns must be None.")
        exclude_columns = j_hashset(exclude_columns)
        mapping = j_hashmap(mapping)
        return KeyValueSpec(
            _JKafkaTools_Produce.jsonSpec(
                include_columns,
                exclude_columns,
                mapping,
                nested_delim,
                output_nulls,
                timestamp_field,
            ))
    except Exception as e:
        raise DHError(e, "failed to create a Kafka key/value spec.") from e
Beispiel #3
0
def _dict_to_j_func(dict_mapping: Dict,
                    mapped_only: bool) -> Callable[[str], str]:
    java_map = j_hashmap(dict_mapping)
    if not mapped_only:
        return _JPythonTools.functionFromMapWithIdentityDefaults(java_map)
    return _JPythonTools.functionFromMapWithDefault(java_map, None)
Beispiel #4
0
def avro_spec(
    schema: str,
    schema_version: str = "latest",
    field_to_col_mapping: Dict[str, str] = None,
    timestamp_field: str = None,
    include_only_columns: List[str] = None,
    exclude_columns: List[str] = None,
    publish_schema: bool = False,
    schema_namespace: str = None,
    column_properties: Dict[str, str] = None,
) -> KeyValueSpec:
    """Creates a spec for how to use an Avro schema to produce a Kafka stream from a Deephaven table.

    Args:
        schema (str):  the name for a schema registered in a Confluent compatible Schema Server. The associated
            'kafka_config' parameter in the call to produce() should include the key 'schema.registry.url' with
            the value of the Schema Server URL for fetching the schema definition
        schema_version (str): the schema version to fetch from schema service, default is 'latest'
        field_to_col_mapping (Dict[str, str]): a mapping from Avro field names in the schema to column names in
            the Deephaven table. Any fields in the schema not present in the dict as keys are mapped to columns of the
            same name. The default is None, meaning all schema fields are mapped to columns of the same name.
        timestamp_field (str): the name of an extra timestamp field to be included in the produced Kafka message body,
            it is used mostly for debugging slowdowns,  default is None.
        include_only_columns (List[str]): the list of column names in the source table to include in the generated
            output, default is None. When not None, the 'exclude_columns' parameter must be None
        exclude_columns (List[str]):  the list of column names to exclude from the generated output (every other column
            will be included), default is None. When not None, the 'include_only_columns' must be None
        publish_schema (bool): when True, publish the given schema name to Schema Registry Server, according to an Avro
            schema generated from the table definition, for the columns and fields implied by field_to_col_mapping,
            include_only_columns, and exclude_columns; if a schema_version is provided and the resulting version after
            publishing does not match, an exception results. The default is False.
        schema_namespace (str): when 'publish_schema' is True, the namespace for the generated schema to be registered
            in the Schema Registry Server.
        column_properties (Dict[str, str]): when 'publish_schema' is True, specifies the properties of the columns
            implying particular Avro type mappings for them. In particular, column X of BigDecimal type should specify
            properties 'x.precision' and 'x.scale'.

    Returns:
        a KeyValueSpec

    Raises:
        DHError
    """
    try:
        field_to_col_mapping = j_hashmap(field_to_col_mapping)
        column_properties = j_properties(column_properties)
        include_only_columns = j_hashset(include_only_columns)
        include_only_columns = _JKafkaTools.predicateFromSet(
            include_only_columns)
        exclude_columns = j_hashset(exclude_columns)
        exclude_columns = _JKafkaTools.predicateFromSet(exclude_columns)

        return KeyValueSpec(
            _JKafkaTools_Produce.avroSpec(
                schema,
                schema_version,
                field_to_col_mapping,
                timestamp_field,
                include_only_columns,
                exclude_columns,
                publish_schema,
                schema_namespace,
                column_properties,
            ))
    except Exception as e:
        raise DHError(e, "failed to create a Kafka key/value spec.") from e