def to_runner_api_parameter(self, unused_context):
     _args_schema = named_fields_to_schema([
         (f'arg{ix}', convert_to_typing_type(instance_to_type(value)))
         for (ix, value) in enumerate(self._args)
     ])
     _kwargs_schema = named_fields_to_schema([
         (key, convert_to_typing_type(instance_to_type(value)))
         for (key, value) in self._kwargs.items()
     ])
     payload_schema = named_fields_to_schema({
         'constructor': str,
         'args': _args_schema,
         'kwargs': _kwargs_schema,
     })
     return (PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN,
             external_transforms_pb2.ExternalConfigurationPayload(
                 schema=payload_schema,
                 payload=coders.RowCoder(payload_schema).encode(
                     Row(constructor=self._constructor,
                         args=Row(
                             **{
                                 f'arg{ix}': arg
                                 for (ix, arg) in enumerate(self._args)
                             }),
                         kwargs=Row(**self._kwargs)), )))
Beispiel #2
0
 def _get_named_tuple_instance(self):
     schema = named_fields_to_schema([
         (k, convert_to_typing_type(v))
         for k, v in self._transform.__init__.__annotations__.items()
         if k in self._values
     ])
     return named_tuple_from_schema(schema)(**self._values)
Beispiel #3
0
    def _get_schema_proto_and_payload(self, *args, **kwargs):
        named_fields = []
        fields_to_values = OrderedDict()
        next_field_id = 0
        for value in args:
            if value is None:
                raise ValueError(
                    'Received value None. None values are currently not supported'
                )
            named_fields.append(
                ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                  next_field_id),
                 convert_to_typing_type(instance_to_type(value))))
            fields_to_values[(
                JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                next_field_id)] = value
            next_field_id += 1
        for key, value in kwargs.items():
            if not key:
                raise ValueError('Parameter name cannot be empty')
            if value is None:
                raise ValueError(
                    'Received value None for key %s. None values are currently not '
                    'supported' % key)
            named_fields.append(
                (key, convert_to_typing_type(instance_to_type(value))))
            fields_to_values[key] = value

        schema_proto = named_fields_to_schema(named_fields)
        row = named_tuple_from_schema(schema_proto)(**fields_to_values)
        schema = named_tuple_to_schema(type(row))

        payload = RowCoder(schema).encode(row)
        return (schema_proto, payload)
    def _get_named_tuple_instance(self):
        # omit fields with value=None since we can't infer their type
        values = {
            key: value
            for key, value in self._values.items() if value is not None
        }

        # In python 2 named_fields_to_schema will not accept str because its
        # ambiguous. This converts str hints to ByteString recursively so its clear
        # we intend to use BYTES.
        # TODO(BEAM-7372): Remove coercion to ByteString
        def coerce_str_to_bytes(typ):
            if typ == str:
                return ByteString

            elif hasattr(typ, '__args__') and hasattr(typ, '__origin__'):
                # Create a new type rather than modifying the existing one
                typ = typ.__origin__[tuple(
                    map(coerce_str_to_bytes, typ.__args__))]

            return typ

        if sys.version_info[0] >= 3:
            coerce_str_to_bytes = lambda x: x

        schema = named_fields_to_schema([
            (key,
             coerce_str_to_bytes(
                 convert_to_typing_type(instance_to_type(value))))
            for key, value in values.items()
        ])
        return named_tuple_from_schema(schema)(**values)
Beispiel #5
0
    def _get_named_tuple_instance(self):
        # omit fields with value=None since we can't infer their type
        values = {
            key: value
            for key, value in self._values.items() if value is not None
        }

        # TODO(BEAM-7372): Remove coercion to ByteString
        def coerce_str_to_bytes(typ):
            if typ == str:
                return ByteString

            elif hasattr(typ, '__args__'):
                typ.__args__ = tuple(map(coerce_str_to_bytes, typ.__args__))

            return typ

        if str == unicode:
            coerce_str_to_bytes = lambda x: x

        schema = named_fields_to_schema([
            (key,
             coerce_str_to_bytes(
                 convert_to_typing_type(instance_to_type(value))))
            for key, value in values.items()
        ])
        return named_tuple_from_schema(schema)(**values)
Beispiel #6
0
 def _get_named_tuple_instance(self):
     import dataclasses
     schema = named_fields_to_schema([
         (field.name, convert_to_typing_type(field.type))
         for field in dataclasses.fields(self._transform)
     ])
     return named_tuple_from_schema(schema)(
         **dataclasses.asdict(self._transform))
Beispiel #7
0
 def from_type_hint(type_hint, registry):
     if isinstance(type_hint, row_type.RowTypeConstraint):
         try:
             schema = named_fields_to_schema(type_hint._fields)
         except ValueError:
             # TODO(BEAM-10570): Consider a pythonsdk logical type.
             return typecoders.registry.get_coder(object)
     else:
         schema = named_tuple_to_schema(type_hint)
     return RowCoder(schema)
Beispiel #8
0
    def _get_named_tuple_instance(self):
        # omit fields with value=None since we can't infer their type
        values = {
            key: value
            for key, value in self._values.items() if value is not None
        }

        schema = named_fields_to_schema([
            (key, convert_to_typing_type(instance_to_type(value)))
            for key, value in values.items()
        ])
        return named_tuple_from_schema(schema)(**values)
def element_type_from_dataframe(proxy, include_indexes=False):
    # type: (pd.DataFrame, bool) -> type
    """Generate an element_type for an element-wise PCollection from a proxy
  pandas object. Currently only supports converting the element_type for
  a schema-aware PCollection to a proxy DataFrame.

  Currently only supports generating a DataFrame proxy from a schema-aware
  PCollection.
  """
    output_columns = []
    if include_indexes:
        remaining_index_names = list(proxy.index.names)
        i = 0
        while len(remaining_index_names):
            index_name = remaining_index_names.pop(0)
            if index_name is None:
                raise ValueError(
                    "Encountered an unnamed index. Cannot convert to a "
                    "schema-aware PCollection with include_indexes=True. "
                    "Please name all indexes or consider not including "
                    "indexes.")
            elif index_name in remaining_index_names:
                raise ValueError(
                    "Encountered multiple indexes with the name '%s'. "
                    "Cannot convert to a schema-aware PCollection with "
                    "include_indexes=True. Please ensure all indexes have "
                    "unique names or consider not including indexes." %
                    index_name)
            elif index_name in proxy.columns:
                raise ValueError(
                    "Encountered an index that has the same name as one "
                    "of the columns, '%s'. Cannot convert to a "
                    "schema-aware PCollection with include_indexes=True. "
                    "Please ensure all indexes have unique names or "
                    "consider not including indexes." % index_name)
            else:
                # its ok!
                output_columns.append(
                    (index_name, proxy.index.get_level_values(i).dtype))
                i += 1

    output_columns.extend(zip(proxy.columns, proxy.dtypes))

    return named_tuple_from_schema(
        named_fields_to_schema([(column, _dtype_to_fieldtype(dtype))
                                for (column, dtype) in output_columns]))
Beispiel #10
0
 def from_type_hint(type_hint, registry):
   if isinstance(type_hint, row_type.RowTypeConstraint):
     schema = named_fields_to_schema(type_hint._fields)
   else:
     schema = named_tuple_to_schema(type_hint)
   return RowCoder(schema)
Beispiel #11
0
def convert_to_typing_type(type_):
    if isinstance(type_, row_type.RowTypeConstraint):
        return named_tuple_from_schema(named_fields_to_schema(type_._fields))
    else:
        return native_type_compatibility.convert_to_typing_type(type_)