def test_reject_invalid_tuplelike(self): with pytest.raises(TypeError): Tuple(1) with pytest.raises(TypeError): Tuple([1]) with pytest.raises(TypeError): Tuple([None])
def test_reject_empty_tuplelike(self): with pytest.raises(AssertionError): Tuple([]) with pytest.raises(AssertionError): Tuple({}) with pytest.raises(AssertionError): Tuple(pandas.Series(dtype=pandas.StringDtype()))
def test_tuple_lazy_get_from_arrow(self): def field_accessor(field_name): return chr(96 + int(field_name)) chr_tuple = Tuple({'1': 'a', '3': 'c'}) tuple_ = Tuple({'1': field_accessor, "3": field_accessor}) assert tuple_ == Tuple({'1': 'a', '3': 'c'}) tuple_ = Tuple({'1': field_accessor, "3": field_accessor}) assert deepcopy(tuple_) == chr_tuple
def process_data_payload( self, from_: ActorVirtualIdentity, payload: DataPayload ) -> Iterator[Union[Tuple, InputExhausted, Marker]]: # special case used to yield for source op if from_ == BatchToTupleConverter.SOURCE_STARTER: yield InputExhausted() yield EndOfAllMarker() return link = self._input_map[from_] if self._current_link is None or self._current_link != link: self._current_link = link yield SenderChangeMarker(link) if isinstance(payload, InputDataFrame): for field_accessor in ArrowTableTupleProvider(payload.frame): yield Tuple({ name: field_accessor for name in payload.frame.column_names }) elif isinstance(payload, EndOfUpstream): self._upstream_map[link].remove(from_) if len(self._upstream_map[link]) == 0: del self._upstream_map[link] yield InputExhausted() if len(self._upstream_map) == 0: yield EndOfAllMarker() else: raise NotImplementedError()
def data_payload(self): df_to_sent = pandas.DataFrame( { 'Brand': ['Honda Civic', 'Toyota Corolla', 'Ford Focus', 'Audi A4'], 'Price': [22000, 25000, 27000, 35000] }, columns=['Brand', 'Price']) return OutputDataFrame( frame=[Tuple(r) for _, r in df_to_sent.iterrows()], schema=to_arrow_schema({ 'Brand': 'string', 'Price': 'integer' }))
def process_tuple_with_udf(self, tuple_: Union[Tuple, InputExhausted], link: LinkIdentity) \ -> Iterator[Optional[Tuple]]: """ Process the Tuple/InputExhausted with the current link. This is a wrapper to invoke processing of the operator. :param tuple_: Union[Tuple, InputExhausted], the current tuple. :param link: LinkIdentity, the current link. :return: Iterator[Tuple], iterator of result Tuple(s). """ # bind link with input index if link not in self._input_link_map: self._input_links.append(link) index = len(self._input_links) - 1 self._input_link_map[link] = index input_ = self._input_link_map[link] return map(lambda t: Tuple(t) if t is not None else None, self._operator.process_tuple(tuple_, input_))
def mock_tuple(self): return Tuple({"test-1": "hello", "test-2": 10})
def target_tuple(self): return Tuple({"x": 1, "y": "a"})
def test_tuple_ne(self, target_tuple): assert not target_tuple != target_tuple assert Tuple({"x": 1, "y": "b"}) != target_tuple
def test_tuple_eq(self, target_tuple): assert target_tuple == target_tuple assert not Tuple({"x": 2, "y": "a"}) == target_tuple
def test_tuple_from_series(self, target_tuple): assert Tuple(pandas.Series({"x": 1, "y": "a"})) == target_tuple
def test_tuple_from_dict(self, target_tuple): assert Tuple({"x": 1, "y": "a"}) == target_tuple
def test_tuple_from_list(self, target_tuple): assert Tuple([("x", 1), ("y", "a")]) == target_tuple