def to_runner_api_parameter(self, unused_context):
     _args_schema = named_fields_to_schema([
         (f'arg{ix}', convert_to_typing_type(instance_to_type(value)))
         for (ix, value) in enumerate(self._args)
     ])
     _kwargs_schema = named_fields_to_schema([
         (key, convert_to_typing_type(instance_to_type(value)))
         for (key, value) in self._kwargs.items()
     ])
     payload_schema = named_fields_to_schema({
         'constructor': str,
         'args': _args_schema,
         'kwargs': _kwargs_schema,
     })
     return (PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN,
             external_transforms_pb2.ExternalConfigurationPayload(
                 schema=payload_schema,
                 payload=coders.RowCoder(payload_schema).encode(
                     Row(constructor=self._constructor,
                         args=Row(
                             **{
                                 f'arg{ix}': arg
                                 for (ix, arg) in enumerate(self._args)
                             }),
                         kwargs=Row(**self._kwargs)), )))
Example #2
0
    def _get_schema_proto_and_payload(self, *args, **kwargs):
        named_fields = []
        fields_to_values = OrderedDict()
        next_field_id = 0
        for value in args:
            if value is None:
                raise ValueError(
                    'Received value None. None values are currently not supported'
                )
            named_fields.append(
                ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                  next_field_id),
                 convert_to_typing_type(instance_to_type(value))))
            fields_to_values[(
                JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                next_field_id)] = value
            next_field_id += 1
        for key, value in kwargs.items():
            if not key:
                raise ValueError('Parameter name cannot be empty')
            if value is None:
                raise ValueError(
                    'Received value None for key %s. None values are currently not '
                    'supported' % key)
            named_fields.append(
                (key, convert_to_typing_type(instance_to_type(value))))
            fields_to_values[key] = value

        schema_proto = named_fields_to_schema(named_fields)
        row = named_tuple_from_schema(schema_proto)(**fields_to_values)
        schema = named_tuple_to_schema(type(row))

        payload = RowCoder(schema).encode(row)
        return (schema_proto, payload)
    def _get_named_tuple_instance(self):
        # omit fields with value=None since we can't infer their type
        values = {
            key: value
            for key, value in self._values.items() if value is not None
        }

        # In python 2 named_fields_to_schema will not accept str because its
        # ambiguous. This converts str hints to ByteString recursively so its clear
        # we intend to use BYTES.
        # TODO(BEAM-7372): Remove coercion to ByteString
        def coerce_str_to_bytes(typ):
            if typ == str:
                return ByteString

            elif hasattr(typ, '__args__') and hasattr(typ, '__origin__'):
                # Create a new type rather than modifying the existing one
                typ = typ.__origin__[tuple(
                    map(coerce_str_to_bytes, typ.__args__))]

            return typ

        if sys.version_info[0] >= 3:
            coerce_str_to_bytes = lambda x: x

        schema = named_fields_to_schema([
            (key,
             coerce_str_to_bytes(
                 convert_to_typing_type(instance_to_type(value))))
            for key, value in values.items()
        ])
        return named_tuple_from_schema(schema)(**values)
Example #4
0
    def testInstanceToType(self):
        class MyClass(object):
            def method(self):
                pass

        test_cases = [
            (typehints.Dict[str, int], {
                'a': 1
            }),
            (typehints.Dict[str, typehints.Union[str, int]], {
                'a': 1,
                'b': 'c'
            }),
            (typehints.Dict[typehints.Any, typehints.Any], {}),
            (typehints.Set[str], {'a'}),
            (typehints.Set[typehints.Union[str, float]], {'a', 0.4}),
            (typehints.Set[typehints.Any], set()),
            (typehints.Tuple[int], (1, )),
            (typehints.Tuple[int, int, str], (1, 2, '3')),
            (typehints.Tuple[()], ()),
            (typehints.List[int], [1]),
            (typehints.List[typehints.Union[int, str]], [1, 'a']),
            (typehints.List[typehints.Any], []),
            (type(None), None),
            (type(MyClass), MyClass),
            (MyClass, MyClass()),
            (type(MyClass.method), MyClass.method),
            (types.MethodType, MyClass().method),
        ]
        for expected_type, instance in test_cases:
            self.assertEqual(expected_type,
                             trivial_inference.instance_to_type(instance),
                             msg=instance)
Example #5
0
    def _get_named_tuple_instance(self):
        # omit fields with value=None since we can't infer their type
        values = {
            key: value
            for key, value in self._values.items() if value is not None
        }

        # TODO(BEAM-7372): Remove coercion to ByteString
        def coerce_str_to_bytes(typ):
            if typ == str:
                return ByteString

            elif hasattr(typ, '__args__'):
                typ.__args__ = tuple(map(coerce_str_to_bytes, typ.__args__))

            return typ

        if str == unicode:
            coerce_str_to_bytes = lambda x: x

        schema = named_fields_to_schema([
            (key,
             coerce_str_to_bytes(
                 convert_to_typing_type(instance_to_type(value))))
            for key, value in values.items()
        ])
        return named_tuple_from_schema(schema)(**values)
Example #6
0
  def expand(self, pcolls):
    if isinstance(pcolls, dict):
      tags = list(pcolls.keys())
      if all(isinstance(tag, str) and len(tag) < 10 for tag in tags):
        # Small, string tags. Pass them as data.
        pcolls_dict = pcolls
        restore_tags = None
      else:
        # Pass the tags in the restore_tags closure.
        tags = list(pcolls.keys())
        pcolls_dict = {str(ix): pcolls[tag] for (ix, tag) in enumerate(tags)}
        restore_tags = lambda vs: {
            tag: vs[str(ix)]
            for (ix, tag) in enumerate(tags)
        }
    else:
      # Tags are tuple indices.
      tags = [str(ix) for ix in range(len(pcolls))]
      pcolls_dict = dict(zip(tags, pcolls))
      restore_tags = lambda vs: tuple(vs[tag] for tag in tags)

    input_key_types = []
    input_value_types = []
    for pcoll in pcolls_dict.values():
      key_type, value_type = typehints.trivial_inference.key_value_types(
          pcoll.element_type)
      input_key_types.append(key_type)
      input_value_types.append(value_type)
    output_key_type = typehints.Union[tuple(input_key_types)]
    iterable_input_value_types = tuple(
        # TODO: Change List[t] to Iterable[t]
        typehints.List[t] for t in input_value_types)

    output_value_type = typehints.Dict[
        str, typehints.Union[iterable_input_value_types or [typehints.Any]]]
    result = (
        pcolls_dict
        | 'CoGroupByKeyImpl' >>
        _CoGBKImpl(pipeline=self.pipeline).with_output_types(
            typehints.Tuple[output_key_type, output_value_type]))

    if restore_tags:
      if isinstance(pcolls, dict):
        dict_key_type = typehints.Union[tuple(
            trivial_inference.instance_to_type(tag) for tag in tags)]
        output_value_type = typehints.Dict[
            dict_key_type, typehints.Union[iterable_input_value_types]]
      else:
        output_value_type = typehints.Tuple[iterable_input_value_types]
      result |= 'RestoreTags' >> MapTuple(
          lambda k, vs: (k, restore_tags(vs))).with_output_types(
              typehints.Tuple[output_key_type, output_value_type])

    return result
Example #7
0
    def _get_named_tuple_instance(self):
        # omit fields with value=None since we can't infer their type
        values = {
            key: value
            for key, value in self._values.items() if value is not None
        }

        schema = named_fields_to_schema([
            (key, convert_to_typing_type(instance_to_type(value)))
            for key, value in values.items()
        ])
        return named_tuple_from_schema(schema)(**values)
Example #8
0
def test_monkey_patch_signature(f, args, kwargs):
    arg_types = [instance_to_type(v) for v in args]
    kwargs_types = {k: instance_to_type(v) for (k, v) in kwargs.items()}
    f_temp = _wrap_task_call(f)
    try:
        getcallargs_forhints(f, *arg_types, **kwargs_types)
    except Exception:
        print("Failed on {} with parameters {}, {}".format(f, args, kwargs))
        raise
    try:
        getcallargs_forhints(f_temp, *arg_types, **kwargs_types)
    except Exception:
        print("Failed on {} with parameters {}, {}".format(
            f_temp, args, kwargs))
        raise
    try:
        expected_signature = inspect.signature(f)
        test_signature = inspect.signature(f_temp)
        assert (expected_signature == test_signature
                ), "Failed on {}, signature {} does not match {}".format(
                    f, expected_signature, test_signature)
    except Exception:
        # expected to pass for py2.7
        pass
Example #9
0
def infer_element_type(elements):
    """For internal use only; no backwards-compatibility guarantees.

  Infer a Beam type for a list of elements.

  Args:
    elements (List[Any]): A list of elements for which the type should be
        inferred.

  Returns:
    A Beam type encompassing all elements.
  """
    element_type = typehints.Union[[
        trivial_inference.instance_to_type(e) for e in elements
    ]]
    return element_type
Example #10
0
 def element_type(side_input):
     if isinstance(side_input, pvalue.AsSideInput):
         return side_input.element_type
     return instance_to_type(side_input)
Example #11
0
 def __init__(self, values):
     schema = {
         key: instance_to_type(value)
         for key, value in values.items()
     }
     super(ImplicitSchemaPayloadBuilder, self).__init__(values, schema)
Example #12
0
 def element_type(side_input):
   if isinstance(side_input, pvalue.AsSideInput):
     return side_input.element_type
   return instance_to_type(side_input)
Example #13
0
 def element_type(side_input):
   if isinstance(side_input, pvalue.PCollectionView):
     return side_input.element_type
   else:
     return instance_to_type(side_input)
Example #14
0
def get_row_coder(row_instance):
    coder = typecoders.registry.get_coder(
        trivial_inference.instance_to_type(row_instance))
    assert isinstance(coder, row_coder.RowCoder)
    return coder
Example #15
0
 def infer_output_type(self, unused_input_type):
   if not self.value:
     return Any
   else:
     return Union[[trivial_inference.instance_to_type(v) for v in self.value]]
Example #16
0
 def element_type(side_input):
     if isinstance(side_input, pvalue.PCollectionView):
         return side_input.element_type
     else:
         return instance_to_type(side_input)