コード例 #1
0
 def _make_join_arg(self) -> pd.Series:
     """
     Shared example join argument used by most of the test cases that follow.
     """
     return pd.Series(
         TokenSpanArray._from_sequence(
             [
                 TokenSpan(_TOKENS_ARRAY, 23, 28),  # Knights of the Round Table
                 TokenSpan(_TOKENS_ARRAY, 17, 19),  # searching for
                 TokenSpan(_TOKENS_ARRAY, 1, 2),  # In
                 TokenSpan(_TOKENS_ARRAY, 1, 2),  # In (second copy)
                 TokenSpan(_TOKENS_ARRAY, 42, 45),  # Lancelot the Brave
             ]
         )
     )
コード例 #2
0
    def test_overlaps_join(self):
        join_arg = pd.Series(
            TokenSpanArray._from_sequence(
                [
                    TokenSpan(_TOKENS_ARRAY, 23, 28),  # Knights of the Round Table
                    TokenSpan(_TOKENS_ARRAY, 17, 19),  # searching for
                    TokenSpan(_TOKENS_ARRAY, 1, 2),  # In
                    TokenSpan(_TOKENS_ARRAY, 1, 2),  # In (second copy)
                    TokenSpan(_TOKENS_ARRAY, 42, 45),  # Lancelot the Brave
                ]
            )
        )

        result1 = overlap_join(join_arg, _CAPS_WORD["match"])
        self.assertEqual(
            str(result1),
            textwrap.dedent(
                """\
                                                first                second
            0  [23, 28): 'Knights of the Round Table'   [23, 24): 'Knights'
            1  [23, 28): 'Knights of the Round Table'     [26, 27): 'Round'
            2  [23, 28): 'Knights of the Round Table'     [27, 28): 'Table'
            3                            [1, 2): 'In'          [1, 2): 'In'
            4                            [1, 2): 'In'          [1, 2): 'In'
            5          [42, 45): 'Lancelot the Brave'  [42, 43): 'Lancelot'
            6          [42, 45): 'Lancelot the Brave'     [44, 45): 'Brave'"""
            ),
        )

        result2 = overlap_join(_CAPS_WORD["match"], join_arg)
        self.assertEqual(
            str(result2),
            textwrap.dedent(
                """\
                              first                                  second
            0          [1, 2): 'In'                            [1, 2): 'In'
            1          [1, 2): 'In'                            [1, 2): 'In'
            2   [23, 24): 'Knights'  [23, 28): 'Knights of the Round Table'
            3     [26, 27): 'Round'  [23, 28): 'Knights of the Round Table'
            4     [27, 28): 'Table'  [23, 28): 'Knights of the Round Table'
            5  [42, 43): 'Lancelot'          [42, 45): 'Lancelot the Brave'
            6     [44, 45): 'Brave'          [42, 45): 'Lancelot the Brave'"""
            ),
        )