def _make_join_arg(self) -> pd.Series: """ Shared example join argument used by most of the test cases that follow. """ return pd.Series( TokenSpanArray._from_sequence( [ TokenSpan(_TOKENS_ARRAY, 23, 28), # Knights of the Round Table TokenSpan(_TOKENS_ARRAY, 17, 19), # searching for TokenSpan(_TOKENS_ARRAY, 1, 2), # In TokenSpan(_TOKENS_ARRAY, 1, 2), # In (second copy) TokenSpan(_TOKENS_ARRAY, 42, 45), # Lancelot the Brave ] ) )
def test_overlaps_join(self): join_arg = pd.Series( TokenSpanArray._from_sequence( [ TokenSpan(_TOKENS_ARRAY, 23, 28), # Knights of the Round Table TokenSpan(_TOKENS_ARRAY, 17, 19), # searching for TokenSpan(_TOKENS_ARRAY, 1, 2), # In TokenSpan(_TOKENS_ARRAY, 1, 2), # In (second copy) TokenSpan(_TOKENS_ARRAY, 42, 45), # Lancelot the Brave ] ) ) result1 = overlap_join(join_arg, _CAPS_WORD["match"]) self.assertEqual( str(result1), textwrap.dedent( """\ first second 0 [23, 28): 'Knights of the Round Table' [23, 24): 'Knights' 1 [23, 28): 'Knights of the Round Table' [26, 27): 'Round' 2 [23, 28): 'Knights of the Round Table' [27, 28): 'Table' 3 [1, 2): 'In' [1, 2): 'In' 4 [1, 2): 'In' [1, 2): 'In' 5 [42, 45): 'Lancelot the Brave' [42, 43): 'Lancelot' 6 [42, 45): 'Lancelot the Brave' [44, 45): 'Brave'""" ), ) result2 = overlap_join(_CAPS_WORD["match"], join_arg) self.assertEqual( str(result2), textwrap.dedent( """\ first second 0 [1, 2): 'In' [1, 2): 'In' 1 [1, 2): 'In' [1, 2): 'In' 2 [23, 24): 'Knights' [23, 28): 'Knights of the Round Table' 3 [26, 27): 'Round' [23, 28): 'Knights of the Round Table' 4 [27, 28): 'Table' [23, 28): 'Knights of the Round Table' 5 [42, 43): 'Lancelot' [42, 45): 'Lancelot the Brave' 6 [44, 45): 'Brave' [42, 45): 'Lancelot the Brave'""" ), )