def test_readlines_in_range(self):
        text = """
        Fetches rows from a Bigtable.

        Retrieves rows pertaining to the given keys from the Table instance
        represented by big_table.  Silly things may happen if
        other_silly_variable is not None.

        Args:
          big_table: An open Bigtable Table instance.
          keys: A sequence of strings representing the key of each table row
              to fetch.
          other_silly_variable: Another optional variable, that has a much
              longer name than the other args, and which does nothing.

        Returns:
          A dict mapping keys to the corresponding table row data
          fetched. Each row is represented as a tuple of strings.
     """
        rgx_start = re.compile("\s*Args:")
        rgx_end = re.compile("\s*Returns:")
        parser = VerdictParser(text)
        lines = parser._lines_in_range(rgx_start, rgx_end)
        self.assertEqual(7, len(lines))
        self.assertTrue(not rgx_end.match(lines[-1]))

        rgx_start = VerdictParser.rgx_map[VerdictParser.e_plaintiff]
        rgx_end = VerdictParser.rgx_map[VerdictParser.e_defendant]
        lines = self.parser._lines_in_range(rgx_start, rgx_end)
        self.assertTrue(5, len(lines))
    def test_get_defendant_info(self):
        text = ur"""施汝憬律師
被 上訴 人 幸福空間有限公司
法定代理人 林鎮業
訴訟代理人 陳建瑜律師
      黃國紘律師
上列當事人間侵害商標權有關財產權爭議等事件,上訴人對於中
        """
        parser = VerdictParser(text)
#        pdb.set_trace()
        info = parser._get_info(VerdictParser.e_defendant)
        self.assertEqual(4, len(info))
 def setUp(self):
     if "verdict_text" in self.__dict__:
         return
     self.test_path = os.path.abspath(os.path.join(__file__, ".."))
     sample = os.path.join(self.test_path, "verdict_sample.txt")
     with file(sample) as _fp:
         self.verdict_text = _fp.read().decode("utf-8")
     self.parser = VerdictParser(self.verdict_text)
 def _set_up_contents(self, file_name):
     test_case = os.path.join(self.test_path, file_name)
     with file(test_case) as _fp:
         contents = _fp.read().decode("utf-8")
     self.parser = VerdictParser(contents)
     return contents
class VerdictParserTest(unittest.TestCase):
    def setUp(self):
        if "verdict_text" in self.__dict__:
            return
        self.test_path = os.path.abspath(os.path.join(__file__, ".."))
        sample = os.path.join(self.test_path, "verdict_sample.txt")
        with file(sample) as _fp:
            self.verdict_text = _fp.read().decode("utf-8")
        self.parser = VerdictParser(self.verdict_text)

    def test_readlines_in_range(self):
        text = """
        Fetches rows from a Bigtable.

        Retrieves rows pertaining to the given keys from the Table instance
        represented by big_table.  Silly things may happen if
        other_silly_variable is not None.

        Args:
          big_table: An open Bigtable Table instance.
          keys: A sequence of strings representing the key of each table row
              to fetch.
          other_silly_variable: Another optional variable, that has a much
              longer name than the other args, and which does nothing.

        Returns:
          A dict mapping keys to the corresponding table row data
          fetched. Each row is represented as a tuple of strings.
     """
        rgx_start = re.compile("\s*Args:")
        rgx_end = re.compile("\s*Returns:")
        parser = VerdictParser(text)
        lines = parser._lines_in_range(rgx_start, rgx_end)
        self.assertEqual(7, len(lines))
        self.assertTrue(not rgx_end.match(lines[-1]))

        rgx_start = VerdictParser.rgx_map[VerdictParser.e_plaintiff]
        rgx_end = VerdictParser.rgx_map[VerdictParser.e_defendant]
        lines = self.parser._lines_in_range(rgx_start, rgx_end)
        self.assertTrue(5, len(lines))


# This test case passes unit test. It exists because it tests re module with
# utf8 encoding, which I was not familiar at that time.  One leason I learned
# is file encoding matters. If your source code(*.py) is not utf-8, re.match
# returns False. To check the file encoding, just use
#
#   $ type your_prog.py
#
#    def test_re_match_utf8(self):
#        line = "被 上訴 人 幸福空間有限公司"
#        rgx = re.compile(r"被\s*上訴\s*人??(?P<name>.+)", re.UNICODE)
#        match = rgx.match(line)
#        self.assertTrue(match != None)
#        name = match.expand(r"\g<name>")
#        print name

    def test_get_defendant_info(self):
        text = ur"""施汝憬律師
被 上訴 人 幸福空間有限公司
法定代理人 林鎮業
訴訟代理人 陳建瑜律師
      黃國紘律師
上列當事人間侵害商標權有關財產權爭議等事件,上訴人對於中
        """
        parser = VerdictParser(text)
#        pdb.set_trace()
        info = parser._get_info(VerdictParser.e_defendant)
        self.assertEqual(4, len(info))

    # @unittest.skip("")
    def test_get_defendant(self):
        defendant = self.parser.get_defendant()
        self.assertEqual(ur"幸福空間有限公司", defendant.get_name())
        self.assertEqual(ur"林鎮業", defendant.get_representative())
        self.assertEqual(2, len(defendant.get_lawyers()))
        self.assertEqual(ur"陳建瑜", defendant.get_lawyers()[0])
        self.assertEqual(ur"黃國紘", defendant.get_lawyers()[1])

    def test_get_plaintiff(self):
        plaintiff = self.parser.get_plaintiff()
        self.assertEqual(ur"美商科高國際有限公司台灣分公司",
                         plaintiff.get_name())
        self.assertEqual(ur"蘇德曼", plaintiff.get_representative())
        self.assertEqual(2, len(plaintiff.get_lawyers()))
        self.assertEqual(ur"徐頌雅", plaintiff.get_lawyers()[0])
        self.assertEqual(ur"施汝憬", plaintiff.get_lawyers()[1])

    def test_re(self):
        rgx = VerdictParser.rgx_map[VerdictParser.e_serial]
        match = rgx.match(ur"【裁判字號】    105,台上,81")
        self.assertEqual(ur"105,台上,81", match.expand("\g<serial>"))

        rgx = VerdictParser.rgx_map[VerdictParser.e_plaintiff]
        # Full space
        self.assertTrue(rgx.match(ur"上 訴 人 美商科高國際") is not None)
        # Half space
        self.assertTrue(rgx.match(ur"上  訴  人  美商科高國際") is not None)

        rgx = VerdictParser.rgx_map[VerdictParser.e_decision]
        # Half space
        self.assertTrue(rgx.match(ur"    主  文") is not None)

        rgx = VerdictParser.rgx_map[VerdictParser.e_reason]
        # Half space
        self.assertTrue(rgx.match(ur"    理  由") is not None)

        rgx = VerdictParser.rgx_map[VerdictParser.e_date]
        # Half space
        text = ur"中    華    民    國  一○五  年    一    月   十四   日"
        self.assertTrue(rgx.match(text) is not None)

        lawyer_re = VerdictParser.rgx_map[VerdictParser.e_lawyer]
        # TODO: handle the following case:
        # text = u"訴訟代理人 楊勝斐"
        # match = lawyer_re.match(text)
        # self.assertEqual(u"楊勝斐", match.expand(r"\g<name>"))

        text = u"  再 抗告 人 中國國際商業銀行股份有限公司"
        self.assertTrue(lawyer_re.match(text) is None)

        rep_re = VerdictParser.rgx_map[VerdictParser.e_representative]
        text = u"法 定 代 理 人 邱欽庭"
        self.assertTrue(rep_re.match(text) is not None)

    def test_get_date(self):
        year, month, day = self.parser.get_verdict_date()
        self.assertEqual(105, year)
        self.assertEqual(1, month)
        self.assertEqual(14, day)

    def test__get_judge_name(self):
        # trailing half spaces
        text = ur"                          審判長法官  高  孟  焄  "
        name = _match_judge_name(text)
        self.assertEqual(ur"高孟焄", name)

        # trailing full spaces
        text = ur"                                法官  鄭  雅  萍 "
        name = _match_judge_name(text)
        self.assertEqual(ur"鄭雅萍", name)

    def test__get_judge_name2(self):
        judges = self.parser.get_judge_names()
        expected = [ur"高孟焄",
                    ur"鄭雅萍",
                    ur"鍾任賜",
                    ur"李錦美",
                    ur"袁靜文"]
        self.assertEqual(set(expected), set(judges))

    def test_get_keywords(self):
        keywords = self.parser.get_keywords()
        # Just use a few important words to test the functionality
        expected = [ur"商標",
                    ur"公平交易法",
                    ur"點擊率",
                    ur"檢索",
                    ur"言論自由",
                    ur"廣告"]
        for item in expected:
            self.assertTrue(item in keywords)

    def test_get_decision(self):
        expected = (u"上訴駁回。\n"
                    u"第三審訴訟費用由上訴人負擔。")
        actual = self.parser.get_verdict_decision()
        self.assertTrue(expected in actual)

    def test_get_summary(self):
        actual = self.parser.get_verdict_summary()
        self.assertEqual(ur"侵害商標權有關財產權爭議等", actual)

    def _set_up_contents(self, file_name):
        test_case = os.path.join(self.test_path, file_name)
        with file(test_case) as _fp:
            contents = _fp.read().decode("utf-8")
        self.parser = VerdictParser(contents)
        return contents

    def test__get_info(self):
        contents = self._set_up_contents(u"臺灣高等法院-民-100勞再4")
        expected = contents.splitlines()[4:7]
        info = self.parser._get_info(VerdictParser.e_defendant)
        self.assertEqual(expected, info)

        contents = self._set_up_contents(u"最高法院-民-93台抗36")
        info = self.parser._get_info(VerdictParser.e_plaintiff)
        self.assertEqual(4, len(info))
        # for line in info:
        #    print("**" + line)
        contents = self._set_up_contents(u"臺灣高等法院-民-92勞上易25")
        info = self.parser._get_info(VerdictParser.e_plaintiff)
        self.assertEqual(6, len(info))
        info = self.parser._get_info(VerdictParser.e_defendant)
        self.assertEqual(3, len(info))