def to_runner_api_parameter(self, unused_context): _args_schema = named_fields_to_schema([ (f'arg{ix}', convert_to_typing_type(instance_to_type(value))) for (ix, value) in enumerate(self._args) ]) _kwargs_schema = named_fields_to_schema([ (key, convert_to_typing_type(instance_to_type(value))) for (key, value) in self._kwargs.items() ]) payload_schema = named_fields_to_schema({ 'constructor': str, 'args': _args_schema, 'kwargs': _kwargs_schema, }) return (PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN, external_transforms_pb2.ExternalConfigurationPayload( schema=payload_schema, payload=coders.RowCoder(payload_schema).encode( Row(constructor=self._constructor, args=Row( **{ f'arg{ix}': arg for (ix, arg) in enumerate(self._args) }), kwargs=Row(**self._kwargs)), )))
def _get_schema_proto_and_payload(self, *args, **kwargs): named_fields = [] fields_to_values = OrderedDict() next_field_id = 0 for value in args: if value is None: raise ValueError( 'Received value None. None values are currently not supported' ) named_fields.append( ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT % next_field_id), convert_to_typing_type(instance_to_type(value)))) fields_to_values[( JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT % next_field_id)] = value next_field_id += 1 for key, value in kwargs.items(): if not key: raise ValueError('Parameter name cannot be empty') if value is None: raise ValueError( 'Received value None for key %s. None values are currently not ' 'supported' % key) named_fields.append( (key, convert_to_typing_type(instance_to_type(value)))) fields_to_values[key] = value schema_proto = named_fields_to_schema(named_fields) row = named_tuple_from_schema(schema_proto)(**fields_to_values) schema = named_tuple_to_schema(type(row)) payload = RowCoder(schema).encode(row) return (schema_proto, payload)
def _get_named_tuple_instance(self): # omit fields with value=None since we can't infer their type values = { key: value for key, value in self._values.items() if value is not None } # In python 2 named_fields_to_schema will not accept str because its # ambiguous. This converts str hints to ByteString recursively so its clear # we intend to use BYTES. # TODO(BEAM-7372): Remove coercion to ByteString def coerce_str_to_bytes(typ): if typ == str: return ByteString elif hasattr(typ, '__args__') and hasattr(typ, '__origin__'): # Create a new type rather than modifying the existing one typ = typ.__origin__[tuple( map(coerce_str_to_bytes, typ.__args__))] return typ if sys.version_info[0] >= 3: coerce_str_to_bytes = lambda x: x schema = named_fields_to_schema([ (key, coerce_str_to_bytes( convert_to_typing_type(instance_to_type(value)))) for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values)
def testInstanceToType(self): class MyClass(object): def method(self): pass test_cases = [ (typehints.Dict[str, int], { 'a': 1 }), (typehints.Dict[str, typehints.Union[str, int]], { 'a': 1, 'b': 'c' }), (typehints.Dict[typehints.Any, typehints.Any], {}), (typehints.Set[str], {'a'}), (typehints.Set[typehints.Union[str, float]], {'a', 0.4}), (typehints.Set[typehints.Any], set()), (typehints.Tuple[int], (1, )), (typehints.Tuple[int, int, str], (1, 2, '3')), (typehints.Tuple[()], ()), (typehints.List[int], [1]), (typehints.List[typehints.Union[int, str]], [1, 'a']), (typehints.List[typehints.Any], []), (type(None), None), (type(MyClass), MyClass), (MyClass, MyClass()), (type(MyClass.method), MyClass.method), (types.MethodType, MyClass().method), ] for expected_type, instance in test_cases: self.assertEqual(expected_type, trivial_inference.instance_to_type(instance), msg=instance)
def _get_named_tuple_instance(self): # omit fields with value=None since we can't infer their type values = { key: value for key, value in self._values.items() if value is not None } # TODO(BEAM-7372): Remove coercion to ByteString def coerce_str_to_bytes(typ): if typ == str: return ByteString elif hasattr(typ, '__args__'): typ.__args__ = tuple(map(coerce_str_to_bytes, typ.__args__)) return typ if str == unicode: coerce_str_to_bytes = lambda x: x schema = named_fields_to_schema([ (key, coerce_str_to_bytes( convert_to_typing_type(instance_to_type(value)))) for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values)
def expand(self, pcolls): if isinstance(pcolls, dict): tags = list(pcolls.keys()) if all(isinstance(tag, str) and len(tag) < 10 for tag in tags): # Small, string tags. Pass them as data. pcolls_dict = pcolls restore_tags = None else: # Pass the tags in the restore_tags closure. tags = list(pcolls.keys()) pcolls_dict = {str(ix): pcolls[tag] for (ix, tag) in enumerate(tags)} restore_tags = lambda vs: { tag: vs[str(ix)] for (ix, tag) in enumerate(tags) } else: # Tags are tuple indices. tags = [str(ix) for ix in range(len(pcolls))] pcolls_dict = dict(zip(tags, pcolls)) restore_tags = lambda vs: tuple(vs[tag] for tag in tags) input_key_types = [] input_value_types = [] for pcoll in pcolls_dict.values(): key_type, value_type = typehints.trivial_inference.key_value_types( pcoll.element_type) input_key_types.append(key_type) input_value_types.append(value_type) output_key_type = typehints.Union[tuple(input_key_types)] iterable_input_value_types = tuple( # TODO: Change List[t] to Iterable[t] typehints.List[t] for t in input_value_types) output_value_type = typehints.Dict[ str, typehints.Union[iterable_input_value_types or [typehints.Any]]] result = ( pcolls_dict | 'CoGroupByKeyImpl' >> _CoGBKImpl(pipeline=self.pipeline).with_output_types( typehints.Tuple[output_key_type, output_value_type])) if restore_tags: if isinstance(pcolls, dict): dict_key_type = typehints.Union[tuple( trivial_inference.instance_to_type(tag) for tag in tags)] output_value_type = typehints.Dict[ dict_key_type, typehints.Union[iterable_input_value_types]] else: output_value_type = typehints.Tuple[iterable_input_value_types] result |= 'RestoreTags' >> MapTuple( lambda k, vs: (k, restore_tags(vs))).with_output_types( typehints.Tuple[output_key_type, output_value_type]) return result
def _get_named_tuple_instance(self): # omit fields with value=None since we can't infer their type values = { key: value for key, value in self._values.items() if value is not None } schema = named_fields_to_schema([ (key, convert_to_typing_type(instance_to_type(value))) for key, value in values.items() ]) return named_tuple_from_schema(schema)(**values)
def test_monkey_patch_signature(f, args, kwargs): arg_types = [instance_to_type(v) for v in args] kwargs_types = {k: instance_to_type(v) for (k, v) in kwargs.items()} f_temp = _wrap_task_call(f) try: getcallargs_forhints(f, *arg_types, **kwargs_types) except Exception: print("Failed on {} with parameters {}, {}".format(f, args, kwargs)) raise try: getcallargs_forhints(f_temp, *arg_types, **kwargs_types) except Exception: print("Failed on {} with parameters {}, {}".format( f_temp, args, kwargs)) raise try: expected_signature = inspect.signature(f) test_signature = inspect.signature(f_temp) assert (expected_signature == test_signature ), "Failed on {}, signature {} does not match {}".format( f, expected_signature, test_signature) except Exception: # expected to pass for py2.7 pass
def infer_element_type(elements): """For internal use only; no backwards-compatibility guarantees. Infer a Beam type for a list of elements. Args: elements (List[Any]): A list of elements for which the type should be inferred. Returns: A Beam type encompassing all elements. """ element_type = typehints.Union[[ trivial_inference.instance_to_type(e) for e in elements ]] return element_type
def element_type(side_input): if isinstance(side_input, pvalue.AsSideInput): return side_input.element_type return instance_to_type(side_input)
def __init__(self, values): schema = { key: instance_to_type(value) for key, value in values.items() } super(ImplicitSchemaPayloadBuilder, self).__init__(values, schema)
def element_type(side_input): if isinstance(side_input, pvalue.AsSideInput): return side_input.element_type return instance_to_type(side_input)
def element_type(side_input): if isinstance(side_input, pvalue.PCollectionView): return side_input.element_type else: return instance_to_type(side_input)
def get_row_coder(row_instance): coder = typecoders.registry.get_coder( trivial_inference.instance_to_type(row_instance)) assert isinstance(coder, row_coder.RowCoder) return coder
def infer_output_type(self, unused_input_type): if not self.value: return Any else: return Union[[trivial_inference.instance_to_type(v) for v in self.value]]
def element_type(side_input): if isinstance(side_input, pvalue.PCollectionView): return side_input.element_type else: return instance_to_type(side_input)