Example #1
0
def typing_to_runner_api(type_):
  if match_is_named_tuple(type_):
    schema = None
    if hasattr(type_, _BEAM_SCHEMA_ID):
      schema = SCHEMA_REGISTRY.get_schema_by_id(getattr(type_, _BEAM_SCHEMA_ID))
    if schema is None:
      fields = [
          schema_pb2.Field(
              name=name, type=typing_to_runner_api(type_._field_types[name]))
          for name in type_._fields
      ]
      type_id = str(uuid4())
      schema = schema_pb2.Schema(fields=fields, id=type_id)
      setattr(type_, _BEAM_SCHEMA_ID, type_id)
      SCHEMA_REGISTRY.add(type_, schema)

    return schema_pb2.FieldType(row_type=schema_pb2.RowType(schema=schema))

  # All concrete types (other than NamedTuple sub-classes) should map to
  # a supported primitive type.
  elif type_ in PRIMITIVE_TO_ATOMIC_TYPE:
    return schema_pb2.FieldType(atomic_type=PRIMITIVE_TO_ATOMIC_TYPE[type_])

  elif _match_is_exactly_mapping(type_):
    key_type, value_type = map(typing_to_runner_api, _get_args(type_))
    return schema_pb2.FieldType(
        map_type=schema_pb2.MapType(key_type=key_type, value_type=value_type))

  elif _match_is_optional(type_):
    # It's possible that a user passes us Optional[Optional[T]], but in python
    # typing this is indistinguishable from Optional[T] - both resolve to
    # Union[T, None] - so there's no need to check for that case here.
    result = typing_to_runner_api(extract_optional_type(type_))
    result.nullable = True
    return result

  elif _safe_issubclass(type_, Sequence):
    element_type = typing_to_runner_api(_get_args(type_)[0])
    return schema_pb2.FieldType(
        array_type=schema_pb2.ArrayType(element_type=element_type))

  elif _safe_issubclass(type_, Mapping):
    key_type, value_type = map(typing_to_runner_api, _get_args(type_))
    return schema_pb2.FieldType(
        map_type=schema_pb2.MapType(key_type=key_type, value_type=value_type))

  try:
    logical_type = LogicalType.from_typing(type_)
  except ValueError:
    # Unknown type, just treat it like Any
    return schema_pb2.FieldType(
        logical_type=schema_pb2.LogicalType(urn=PYTHON_ANY_URN))
  else:
    # TODO(bhulette): Add support for logical types that require arguments
    return schema_pb2.FieldType(
        logical_type=schema_pb2.LogicalType(
            urn=logical_type.urn(),
            representation=typing_to_runner_api(
                logical_type.representation_type())))
Example #2
0
 def test_python_callable_maps_to_logical_type(self):
     from apache_beam.utils.python_callable import PythonCallableWithSource
     self.assertEqual(
         schema_pb2.FieldType(logical_type=schema_pb2.LogicalType(
             urn=common_urns.python_callable.urn,
             representation=typing_to_runner_api(str))),
         typing_to_runner_api(PythonCallableWithSource))
     self.assertEqual(
         typing_from_runner_api(
             schema_pb2.FieldType(logical_type=schema_pb2.LogicalType(
                 urn=common_urns.python_callable.urn,
                 representation=typing_to_runner_api(str)))),
         PythonCallableWithSource)
Example #3
0
  def test_create_row_coder_from_schema(self):
    schema = schema_pb2.Schema(
        id="person",
        fields=[
            schema_pb2.Field(
                name="name",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING)),
            schema_pb2.Field(
                name="age",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)),
            schema_pb2.Field(
                name="address",
                type=schema_pb2.FieldType(
                    atomic_type=schema_pb2.STRING, nullable=True)),
            schema_pb2.Field(
                name="aliases",
                type=schema_pb2.FieldType(
                    array_type=schema_pb2.ArrayType(
                        element_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.STRING)))),
            schema_pb2.Field(
                name="knows_javascript",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN)),
            schema_pb2.Field(
                name="payload",
                type=schema_pb2.FieldType(
                    atomic_type=schema_pb2.BYTES, nullable=True)),
            schema_pb2.Field(
                name="custom_metadata",
                type=schema_pb2.FieldType(
                    map_type=schema_pb2.MapType(
                        key_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.STRING),
                        value_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.INT64),
                    ))),
            schema_pb2.Field(
                name="favorite_time",
                type=schema_pb2.FieldType(
                    logical_type=schema_pb2.LogicalType(
                        urn="beam:logical_type:micros_instant:v1",
                        representation=schema_pb2.FieldType(
                            row_type=schema_pb2.RowType(
                                schema=schema_pb2.Schema(
                                    id="micros_instant",
                                    fields=[
                                        schema_pb2.Field(
                                            name="seconds",
                                            type=schema_pb2.FieldType(
                                                atomic_type=schema_pb2.INT64)),
                                        schema_pb2.Field(
                                            name="micros",
                                            type=schema_pb2.FieldType(
                                                atomic_type=schema_pb2.INT64)),
                                    ])))))),
        ])
    coder = RowCoder(schema)

    for test_case in self.PEOPLE:
      self.assertEqual(test_case, coder.decode(coder.encode(test_case)))
Example #4
0
 def test_unknown_primitive_maps_to_any(self):
     self.assertEqual(
         typing_to_runner_api(np.uint32),
         schema_pb2.FieldType(logical_type=schema_pb2.LogicalType(
             urn="beam:logical:pythonsdk_any:v1"),
                              nullable=True))