Beispiel #1
0
    def put(self, head_addr_str, tail_rule_str, zipcode):

        addr = Address(head_addr_str)

        # (a, b, c)

        self.put_precise(
            addr.flat(),
            head_addr_str+tail_rule_str,
            zipcode
        )

        # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,)

        len_tokens = len(addr)
        for f in range(len_tokens):
            for l in range(f, len_tokens):
                self.put_gradual(
                    addr.flat(f, l+1),
                    zipcode
                )

        if len_tokens >= 3:
            # (a, b, c, d) -> (a, c)
            self.put_gradual(addr.pick_to_flat(0, 2), zipcode)
Beispiel #2
0
def test_address_flat():

    addr = Address('臺北市大安區市府路1之1號')
    assert addr.flat(1) == addr.flat(-3) == '臺北市'
    assert addr.flat(2) == addr.flat(-2) == '臺北市大安區'
    assert addr.flat(3) == addr.flat(-1) == '臺北市大安區市府路'
    assert addr.flat() == '臺北市大安區市府路1之1號'
Beispiel #3
0
def test_address_flat():

    addr = Address('臺北市大安區市府路1之1號')
    assert addr.flat(1) == addr.flat(-3) == u'臺北市'
    assert addr.flat(2) == addr.flat(-2) == u'臺北市大安區'
    assert addr.flat(3) == addr.flat(-1) == u'臺北市大安區市府路'
    assert addr.flat() == u'臺北市大安區市府路1之1號'
Beispiel #4
0
def test_address_flat():

    addr = Address("臺北市大安區市府路1之1號")
    assert addr.flat(1) == addr.flat(-3) == u"臺北市"
    assert addr.flat(2) == addr.flat(-2) == u"臺北市大安區"
    assert addr.flat(3) == addr.flat(-1) == u"臺北市大安區市府路"
    assert addr.flat() == u"臺北市大安區市府路1之1號"
Beispiel #5
0
    def find(self, addr_str):

        addr = Address(addr_str)
        len_addr_tokens = len(addr.tokens)

        # avoid unnecessary iteration
        start_len = len_addr_tokens
        while start_len >= 0:
            if addr.parse(start_len-1) == (0, 0):
                break
            start_len -= 1

        for i in range(start_len, 0, -1):

            addr_str = addr.flat(i)

            rzpairs = self.get_rule_str_zipcode_pairs(addr_str)

            # for handling insignificant tokens and redundant unit
            if (
                # It only runs once, and must be the first iteration.
                i == start_len and
                len_addr_tokens >= 4 and
                addr.tokens[2][Address.UNIT] in u'村里' and
                not rzpairs
            ):

                if addr.tokens[3][Address.UNIT] == u'鄰':
                    # delete the insignificant token (whose unit is 鄰)
                    del addr.tokens[3]
                    len_addr_tokens -= 1

                if len_addr_tokens >= 4 and addr.tokens[3][Address.UNIT] == u'號':
                    # empty the redundant unit in the token
                    addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME], u'')
                else:
                    # delete insignificant token (whose unit is 村 or 里)
                    del addr.tokens[2]

                rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3))

            if rzpairs:
                for rule_str, zipcode in rzpairs:
                    if Rule(rule_str).match(addr):
                        return zipcode

            gzipcode = self.get_gradual_zipcode(addr_str)
            if gzipcode:
                return gzipcode

        return u''
Beispiel #6
0
    def find(self, addr_str):

        addr = Address(addr_str)
        len_addr_tokens = len(addr.tokens)

        # avoid unnecessary iteration
        start_len = len_addr_tokens
        while start_len >= 0:
            if addr.parse(start_len - 1) == (0, 0):
                break
            start_len -= 1

        for i in range(start_len, 0, -1):

            addr_str = addr.flat(i)

            rzpairs = self.get_rule_str_zipcode_pairs(addr_str)

            # for handling insignificant tokens and redundant unit
            if (
                    # It only runs once, and must be the first iteration.
                    i == start_len and len_addr_tokens >= 4
                    and addr.tokens[2][Address.UNIT] in u'村里' and not rzpairs):

                if addr.tokens[3][Address.UNIT] == u'鄰':
                    # delete the insignificant token (whose unit is 鄰)
                    del addr.tokens[3]
                    len_addr_tokens -= 1

                if len_addr_tokens >= 4 and addr.tokens[3][
                        Address.UNIT] == u'號':
                    # empty the redundant unit in the token
                    addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME],
                                      u'')
                else:
                    # delete insignificant token (whose unit is 村 or 里)
                    del addr.tokens[2]

                rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3))

            if rzpairs:
                for rule_str, zipcode in rzpairs:
                    if Rule(rule_str).match(addr):
                        return zipcode

            gzipcode = self.get_gradual_zipcode(addr_str)
            if gzipcode:
                return gzipcode

        return u''
Beispiel #7
0
    def put(self, head_addr_str, tail_rule_str, zipcode):

        addr = Address(head_addr_str)

        # (a, b, c)

        self.put_precise(addr.flat(), head_addr_str + tail_rule_str, zipcode)

        # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,)

        len_tokens = len(addr)
        for f in range(len_tokens):
            for l in range(f, len_tokens):
                self.put_gradual(addr.flat(f, l + 1), zipcode)

        if len_tokens >= 3:
            # (a, b, c, d) -> (a, c)
            self.put_gradual(addr.pick_to_flat(0, 2), zipcode)