def test_deterministic_coder(self): coder = coders.FastPrimitivesCoder() deterministic_coder = coders.DeterministicFastPrimitivesCoder(coder, 'step') self.check_coder(deterministic_coder, *self.test_values_deterministic) for v in self.test_values_deterministic: self.check_coder(coders.TupleCoder((deterministic_coder, )), (v, )) self.check_coder( coders.TupleCoder( (deterministic_coder, ) * len(self.test_values_deterministic)), tuple(self.test_values_deterministic)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, dict()) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, [1, dict()]) self.check_coder( coders.TupleCoder((deterministic_coder, coder)), (1, dict()), ('a', [dict()])) self.check_coder(deterministic_coder, test_message.MessageA(field1='value')) if dataclasses is not None: self.check_coder( deterministic_coder, [FrozenDataClass(1, 2), MyNamedTuple(1, 2)]) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, UnFrozenDataClass(1, 2)) with self.assertRaises(TypeError): self.check_coder( deterministic_coder, FrozenDataClass(UnFrozenDataClass(1, 2), 3)) with self.assertRaises(TypeError): self.check_coder( deterministic_coder, MyNamedTuple(UnFrozenDataClass(1, 2), 3))
def test_deterministic_coder(self): coder = coders.FastPrimitivesCoder() deterministic_coder = coders.DeterministicFastPrimitivesCoder( coder, 'step') self.check_coder(deterministic_coder, *self.test_values_deterministic) for v in self.test_values_deterministic: self.check_coder(coders.TupleCoder((deterministic_coder, )), (v, )) self.check_coder( coders.TupleCoder( (deterministic_coder, ) * len(self.test_values_deterministic)), tuple(self.test_values_deterministic)) self.check_coder(deterministic_coder, {}) self.check_coder(deterministic_coder, {2: 'x', 1: 'y'}) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, {1: 'x', 'y': 2}) self.check_coder(deterministic_coder, [1, {}]) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, [1, {1: 'x', 'y': 2}]) self.check_coder(coders.TupleCoder((deterministic_coder, coder)), (1, {}), ('a', [{}])) self.check_coder(deterministic_coder, test_message.MessageA(field1='value')) self.check_coder( deterministic_coder, [MyNamedTuple(1, 2), MyTypedNamedTuple(1, 'a')]) if dataclasses is not None: self.check_coder(deterministic_coder, FrozenDataClass(1, 2)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, UnFrozenDataClass(1, 2)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, FrozenDataClass(UnFrozenDataClass(1, 2), 3)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, MyNamedTuple(UnFrozenDataClass(1, 2), 3)) self.check_coder(deterministic_coder, list(MyEnum)) self.check_coder(deterministic_coder, list(MyIntEnum)) self.check_coder(deterministic_coder, list(MyIntFlag)) self.check_coder(deterministic_coder, list(MyFlag)) self.check_coder( deterministic_coder, [DefinesGetAndSetState(1), DefinesGetAndSetState((1, 2, 3))]) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, DefinesGetState(1)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, DefinesGetAndSetState({ 1: 'x', 'y': 2 }))
def test_deterministic_coder(self): coder = coders.FastPrimitivesCoder() deterministic_coder = coders.DeterministicFastPrimitivesCoder(coder, 'step') self.check_coder(deterministic_coder, 'a', 1, 1.5, (1, 2, 3)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, dict()) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, [1, dict()]) self.check_coder(coders.TupleCoder((deterministic_coder, coder)), (1, dict()), ('a', [dict()]))
def test_deterministic_coder(self): coder = coders.FastPrimitivesCoder() deterministic_coder = coders.DeterministicFastPrimitivesCoder( coder, 'step') self.check_coder(deterministic_coder, *self.test_values_deterministic) for v in self.test_values_deterministic: self.check_coder(coders.TupleCoder((deterministic_coder, )), (v, )) self.check_coder( coders.TupleCoder( (deterministic_coder, ) * len(self.test_values_deterministic)), tuple(self.test_values_deterministic)) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, dict()) with self.assertRaises(TypeError): self.check_coder(deterministic_coder, [1, dict()]) self.check_coder(coders.TupleCoder((deterministic_coder, coder)), (1, dict()), ('a', [dict()]))
def verify_deterministic(self, key_coder, op_name, silent=True): if not key_coder.is_deterministic(): error_msg = ('The key coder "%s" for %s ' 'is not deterministic. This may result in incorrect ' 'pipeline output. This can be fixed by adding a type ' 'hint to the operation preceding the GroupByKey step, ' 'and for custom key classes, by writing a ' 'deterministic custom Coder. Please see the ' 'documentation for more details.' % (key_coder, op_name)) # TODO(vikasrk): PickleCoder will eventually be removed once its direct # usage is stopped. if isinstance(key_coder, (coders.PickleCoder, coders.FastPrimitivesCoder)): if not silent: logging.warning(error_msg) return coders.DeterministicFastPrimitivesCoder(key_coder, op_name) else: raise ValueError(error_msg) else: return key_coder