Beispiel #1
0
    def put(self, head_addr_str, tail_rule_str, zipcode):

        addr = Address(head_addr_str)

        # (a, b, c)

        self.put_precise(
            addr.flat(),
            head_addr_str+tail_rule_str,
            zipcode
        )

        # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,)

        len_tokens = len(addr)
        for f in range(len_tokens):
            for l in range(f, len_tokens):
                self.put_gradual(
                    addr.flat(f, l+1),
                    zipcode
                )

        if len_tokens >= 3:
            # (a, b, c, d) -> (a, c)
            self.put_gradual(addr.pick_to_flat(0, 2), zipcode)
Beispiel #2
0
    def find(self, addr_str):

        addr = Address(addr_str)
        len_addr_tokens = len(addr.tokens)

        # avoid unnecessary iteration
        start_len = len_addr_tokens
        while start_len >= 0:
            if addr.parse(start_len-1) == (0, 0):
                break
            start_len -= 1

        for i in range(start_len, 0, -1):

            addr_str = addr.flat(i)

            rzpairs = self.get_rule_str_zipcode_pairs(addr_str)

            # for handling insignificant tokens and redundant unit
            if (
                # It only runs once, and must be the first iteration.
                i == start_len and
                len_addr_tokens >= 4 and
                addr.tokens[2][Address.UNIT] in u'村里' and
                not rzpairs
            ):

                if addr.tokens[3][Address.UNIT] == u'鄰':
                    # delete the insignificant token (whose unit is 鄰)
                    del addr.tokens[3]
                    len_addr_tokens -= 1

                if len_addr_tokens >= 4 and addr.tokens[3][Address.UNIT] == u'號':
                    # empty the redundant unit in the token
                    addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME], u'')
                else:
                    # delete insignificant token (whose unit is 村 or 里)
                    del addr.tokens[2]

                rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3))

            if rzpairs:
                for rule_str, zipcode in rzpairs:
                    if Rule(rule_str).match(addr):
                        return zipcode

            gzipcode = self.get_gradual_zipcode(addr_str)
            if gzipcode:
                return gzipcode

        return u''
Beispiel #3
0
def test_address_init_tricky_input():

    assert Address(u'桃園縣中壢市普義').tokens == [(u'', u'', u'桃園', u'縣'),
                                           (u'', u'', u'中壢', u'市'),
                                           (u'', u'', u'普義', u'')]
    assert Address(u'桃園縣中壢市普義10號').tokens == [(u'', u'', u'桃園', u'縣'),
                                              (u'', u'', u'中壢', u'市'),
                                              (u'', u'', u'普義', u''),
                                              (u'10', u'', u'', u'號')]

    assert Address(u'臺北市中山區敬業1路').tokens == [(u'', u'', u'臺北', u'市'),
                                             (u'', u'', u'中山', u'區'),
                                             (u'', u'', u'敬業1', u'路')]
    assert Address(u'臺北市中山區敬業1路10號').tokens == [(u'', u'', u'臺北', u'市'),
                                                (u'', u'', u'中山', u'區'),
                                                (u'', u'', u'敬業1', u'路'),
                                                (u'10', u'', u'', u'號')]
Beispiel #4
0
def test_address_init_tricky_input():

    assert Address('桃園縣中壢市普義').tokens == [('', '', '桃園', '縣'),
                                          ('', '', '中壢', '市'),
                                          ('', '', '普義', '')]
    assert Address('桃園縣中壢市普義10號').tokens == [('', '', '桃園', '縣'),
                                             ('', '', '中壢', '市'),
                                             ('', '', '普義', ''),
                                             ('10', '', '', '號')]

    assert Address('臺北市中山區敬業1路').tokens == [('', '', '臺北', '市'),
                                            ('', '', '中山', '區'),
                                            ('', '', '敬業1', '路')]
    assert Address('臺北市中山區敬業1路10號').tokens == [('', '', '臺北', '市'),
                                               ('', '', '中山', '區'),
                                               ('', '', '敬業1', '路'),
                                               ('10', '', '', '號')]
Beispiel #5
0
def test_address_repr():

    if six.PY2:
        repr_str = "Address(u'\\u81fa\\u5317\\u5e02\\u5927\\u5b89\\u5340\\u5e02\\u5e9c\\u8def1\\u865f')"
    else:
        repr_str = "Address('臺北市大安區市府路1號')"
    assert repr(Address('臺北市大安區市府路1號')) == repr_str
    assert repr(eval(repr_str)) == repr_str
Beispiel #6
0
def test_address_flat():

    addr = Address('臺北市大安區市府路1之1號')
    assert addr.flat(1) == addr.flat(-3) == '臺北市'
    assert addr.flat(2) == addr.flat(-2) == '臺北市大安區'
    assert addr.flat(3) == addr.flat(-1) == '臺北市大安區市府路'
    assert addr.flat() == '臺北市大安區市府路1之1號'
Beispiel #7
0
    def put(self, head_addr_str, tail_rule_str, zipcode):

        addr = Address(head_addr_str)

        # (a, b, c)

        self.put_precise(addr.flat(), head_addr_str + tail_rule_str, zipcode)

        # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,)

        len_tokens = len(addr)
        for f in range(len_tokens):
            for l in range(f, len_tokens):
                self.put_gradual(addr.flat(f, l + 1), zipcode)

        if len_tokens >= 3:
            # (a, b, c, d) -> (a, c)
            self.put_gradual(addr.pick_to_flat(0, 2), zipcode)
Beispiel #8
0
def test_address_init_normalization():

    expected_tokens = [(u'', u'', u'臺北', u'市'), (u'', u'', u'大安', u'區'), (u'', u'', u'市府', u'路'), (u'1', u'之1', u'', u'號')]
    assert Address(u'臺北市大安區市府路1之1號').tokens == expected_tokens
    assert Address(u'台北市大安區市府路1之1號').tokens == expected_tokens
    assert Address(u'臺北市大安區市府路1之1號').tokens == expected_tokens
    assert Address(u'臺北市 大安區 市府路 1 之 1 號').tokens == expected_tokens
    assert Address(u'臺北市,大安區,市府路 1 之 1 號').tokens == expected_tokens
    assert Address(u'臺北市, 大安區, 市府路 1 之 1 號').tokens == expected_tokens
    assert Address(u'臺北市, 大安區, 市府路 1 - 1 號').tokens == expected_tokens
Beispiel #9
0
def test_address_flat():

    addr = Address("臺北市大安區市府路1之1號")
    assert addr.flat(1) == addr.flat(-3) == u"臺北市"
    assert addr.flat(2) == addr.flat(-2) == u"臺北市大安區"
    assert addr.flat(3) == addr.flat(-1) == u"臺北市大安區市府路"
    assert addr.flat() == u"臺北市大安區市府路1之1號"
Beispiel #10
0
def test_address_flat():

    addr = Address('臺北市大安區市府路1之1號')
    assert addr.flat(1) == addr.flat(-3) == u'臺北市'
    assert addr.flat(2) == addr.flat(-2) == u'臺北市大安區'
    assert addr.flat(3) == addr.flat(-1) == u'臺北市大安區市府路'
    assert addr.flat() == u'臺北市大安區市府路1之1號'
Beispiel #11
0
def test_rule_match_gradual_address():

    # standard rule w/ gradual addresses

    rule = Rule('臺北市中正區丹陽街全')
    assert not rule.match(Address('臺北市'))
    assert not rule.match(Address('臺北市中正區'))
    assert not rule.match(Address('臺北市中正區仁愛路1段'))
    assert not rule.match(Address('臺北市中正區仁愛路1段1號'))

    rule = Rule('臺北市,中正區,仁愛路1段,    1號')
    assert not rule.match(Address('臺北市'))
    assert not rule.match(Address('臺北市中正區'))
    assert not rule.match(Address('臺北市中正區仁愛路1段'))
    assert rule.match(Address('臺北市中正區仁愛路1段1號'))
Beispiel #12
0
def test_rule_match():

    # standard address w/ standard rules

    addr = Address('臺北市大安區市府路5號')

    # 全單雙
    assert Rule('臺北市大安區市府路全').match(addr)
    assert Rule('臺北市大安區市府路單全').match(addr)
    assert not Rule('臺北市大安區市府路雙全').match(addr)

    # 以上 & 以下
    assert not Rule('臺北市大安區市府路6號以上').match(addr)
    assert Rule('臺北市大安區市府路6號以下').match(addr)
    assert Rule('臺北市大安區市府路5號以上').match(addr)
    assert Rule('臺北市大安區市府路5號').match(addr)
    assert Rule('臺北市大安區市府路5號以下').match(addr)
    assert Rule('臺北市大安區市府路4號以上').match(addr)
    assert not Rule('臺北市大安區市府路4號以下').match(addr)

    # 至
    assert not Rule('臺北市大安區市府路1號至4號').match(addr)
    assert Rule('臺北市大安區市府路1號至5號').match(addr)
    assert Rule('臺北市大安區市府路5號至9號').match(addr)
    assert not Rule('臺北市大安區市府路6號至9號').match(addr)

    # 附號
    assert not Rule('臺北市大安區市府路6號及以上附號').match(addr)
    assert Rule('臺北市大安區市府路6號含附號以下').match(addr)
    assert Rule('臺北市大安區市府路5號及以上附號').match(addr)
    assert Rule('臺北市大安區市府路5號含附號').match(addr)
    assert not Rule('臺北市大安區市府路5附號全').match(addr)
    assert Rule('臺北市大安區市府路5號含附號以下').match(addr)
    assert Rule('臺北市大安區市府路4號及以上附號').match(addr)
    assert not Rule('臺北市大安區市府路4號含附號以下').match(addr)

    # 單雙 x 以上, 至, 以下
    assert Rule('臺北市大安區市府路單5號以上').match(addr)
    assert not Rule('臺北市大安區市府路雙5號以上').match(addr)
    assert Rule('臺北市大安區市府路單1號至5號').match(addr)
    assert not Rule('臺北市大安區市府路雙1號至5號').match(addr)
    assert Rule('臺北市大安區市府路單5號至9號').match(addr)
    assert not Rule('臺北市大安區市府路雙5號至9號').match(addr)
    assert Rule('臺北市大安區市府路單5號以下').match(addr)
    assert not Rule('臺北市大安區市府路雙5號以下').match(addr)
Beispiel #13
0
    def find(self, addr_str):

        addr = Address(addr_str)
        len_addr_tokens = len(addr.tokens)

        # avoid unnecessary iteration
        start_len = len_addr_tokens
        while start_len >= 0:
            if addr.parse(start_len - 1) == (0, 0):
                break
            start_len -= 1

        for i in range(start_len, 0, -1):

            addr_str = addr.flat(i)

            rzpairs = self.get_rule_str_zipcode_pairs(addr_str)

            # for handling insignificant tokens and redundant unit
            if (
                    # It only runs once, and must be the first iteration.
                    i == start_len and len_addr_tokens >= 4
                    and addr.tokens[2][Address.UNIT] in u'村里' and not rzpairs):

                if addr.tokens[3][Address.UNIT] == u'鄰':
                    # delete the insignificant token (whose unit is 鄰)
                    del addr.tokens[3]
                    len_addr_tokens -= 1

                if len_addr_tokens >= 4 and addr.tokens[3][
                        Address.UNIT] == u'號':
                    # empty the redundant unit in the token
                    addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME],
                                      u'')
                else:
                    # delete insignificant token (whose unit is 村 or 里)
                    del addr.tokens[2]

                rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3))

            if rzpairs:
                for rule_str, zipcode in rzpairs:
                    if Rule(rule_str).match(addr):
                        return zipcode

            gzipcode = self.get_gradual_zipcode(addr_str)
            if gzipcode:
                return gzipcode

        return u''
Beispiel #14
0
    def part(rule_str):

        rule_str = Address.normalize(rule_str)

        rule_tokens = set()

        def extract(m):

            token = m.group()
            retval = u''

            if token == u'連':
                token = u''
            elif token == u'附號全':
                retval = u'號'

            if token:
                rule_tokens.add(token)

            return retval

        addr_str = Rule.RULE_TOKEN_RE.sub(extract, rule_str)

        return (rule_tokens, addr_str)
Beispiel #15
0
    def part(rule_str):

        rule_str = Address.normalize(rule_str)

        rule_tokens = set()

        def extract(m):

            token = m.group()
            retval = u''

            if token == u'連':
                token = u''
            elif token == u'附號全':
                retval = u'號'

            if token:
                rule_tokens.add(token)

            return retval

        addr_str = Rule.RULE_TOKEN_RE.sub(extract, rule_str)

        return (rule_tokens, addr_str)
Beispiel #16
0
def test_address_init_subno():

    expected_tokens = [('', '', '臺北', '市'), ('', '', '大安', '區'),
                       ('', '', '市府', '路'), ('1', '之1', '', '號')]
    assert Address('臺北市大安區市府路1之1號').tokens == expected_tokens
    assert Address('臺北市大安區市府路1之1號').tokens == expected_tokens
Beispiel #17
0
def test_address_init_normalization_chinese_number():

    assert Address.normalize(u"八德路") == u"八德路"
    assert Address.normalize(u"三元街") == u"三元街"

    assert Address.normalize(u"三號") == u"3號"
    assert Address.normalize(u"十八號") == u"18號"
    assert Address.normalize(u"三十八號") == u"38號"

    assert Address.normalize(u"三段") == u"3段"
    assert Address.normalize(u"十八路") == u"18路"
    assert Address.normalize(u"三十八街") == u"38街"

    assert Address.normalize(u"信義路一段") == u"信義路1段"
    assert Address.normalize(u"敬業一路") == u"敬業1路"
    assert Address.normalize(u"愛富三街") == u"愛富3街"
Beispiel #18
0
def test_rule_match_subno():

    rule = Rule('臺北市,中正區,杭州南路1段,   14號含附號')
    assert not rule.match(Address('臺北市中正區杭州南路1段13號'))
    assert not rule.match(Address('臺北市中正區杭州南路1段13-1號'))
    assert rule.match(Address('臺北市中正區杭州南路1段14號'))
    assert rule.match(Address('臺北市中正區杭州南路1段14-1號'))
    assert not rule.match(Address('臺北市中正區杭州南路1段15號'))
    assert not rule.match(Address('臺北市中正區杭州南路1段15-1號'))

    rule = Rule('臺北市,大同區,哈密街,   47附號全')
    assert not rule.match(Address('臺北市大同區哈密街46號'))
    assert not rule.match(Address('臺北市大同區哈密街46-1號'))
    assert not rule.match(Address('臺北市大同區哈密街47號'))
    assert rule.match(Address('臺北市大同區哈密街47-1號'))
    assert not rule.match(Address('臺北市大同區哈密街48號'))
    assert not rule.match(Address('臺北市大同區哈密街48-1號'))

    rule = Rule('臺北市,大同區,哈密街,雙  68巷至  70號含附號全')
    assert not rule.match(Address('臺北市大同區哈密街66號'))
    assert not rule.match(Address('臺北市大同區哈密街66-1巷'))
    assert not rule.match(Address('臺北市大同區哈密街67號'))
    assert not rule.match(Address('臺北市大同區哈密街67-1巷'))
    assert rule.match(Address('臺北市大同區哈密街68巷'))
    assert rule.match(Address('臺北市大同區哈密街68-1號'))
    assert not rule.match(Address('臺北市大同區哈密街69號'))
    assert not rule.match(Address('臺北市大同區哈密街69-1巷'))
    assert rule.match(Address('臺北市大同區哈密街70號'))
    assert rule.match(Address('臺北市大同區哈密街70-1號'))
    assert not rule.match(Address('臺北市大同區哈密街71號'))
    assert not rule.match(Address('臺北市大同區哈密街71-1號'))

    rule = Rule('桃園縣,中壢市,普義,連  49號含附號以下')
    assert rule.match(Address('桃園縣中壢市普義48號'))
    assert rule.match(Address('桃園縣中壢市普義48-1號'))
    assert rule.match(Address('桃園縣中壢市普義49號'))
    assert rule.match(Address('桃園縣中壢市普義49-1號'))
    assert not rule.match(Address('桃園縣中壢市普義50號'))
    assert not rule.match(Address('桃園縣中壢市普義50-1號'))

    rule = Rule('臺中市,西屯區,西屯路3段西平南巷,    2之   3號及以上附號')
    assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷1號'))
    assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷1-1號'))
    assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷2號'))
    assert not rule.match(Address('臺中市西屯區西屯路3段西平南巷2-2號'))
    assert rule.match(Address('臺中市西屯區西屯路3段西平南巷2-3號'))
    assert rule.match(Address('臺中市西屯區西屯路3段西平南巷3號'))
    assert rule.match(Address('臺中市西屯區西屯路3段西平南巷3-1號'))
    assert rule.match(Address('臺中市西屯區西屯路3段西平南巷4號'))
    assert rule.match(Address('臺中市西屯區西屯路3段西平南巷4-1號'))
Beispiel #19
0
 def __init__(self, addr_str):
     self.tokens = Address.tokenize(addr_str)
Beispiel #20
0
 def tokenize(addr_str):
     return Address.TOKEN_RE.findall(Address.normalize(addr_str))
Beispiel #21
0
def test_address_repr():

    repr_str = "Address(u'\u81fa\u5317\u5e02\u5927\u5b89\u5340\u5e02\u5e9c\u8def1\u865f')"
    assert repr(Address('臺北市大安區市府路1號')) == repr_str
    assert repr(eval(repr_str)) == repr_str
Beispiel #22
0
def test_address_init_normalization_chinese_number():

    assert Address.normalize(u'八德路') == u'八德路'
    assert Address.normalize(u'三元街') == u'三元街'

    assert Address.normalize(u'三號') == u'3號'
    assert Address.normalize(u'十八號') == u'18號'
    assert Address.normalize(u'三十八號') == u'38號'

    assert Address.normalize(u'三段') == u'3段'
    assert Address.normalize(u'十八路') == u'18路'
    assert Address.normalize(u'三十八街') == u'38街'

    assert Address.normalize(u'信義路一段') == u'信義路1段'
    assert Address.normalize(u'敬業一路') == u'敬業1路'
    assert Address.normalize(u'愛富三街') == u'愛富3街'
Beispiel #23
0
 def tokenize(addr_str):
     return Address.TOKEN_RE.findall(Address.normalize(addr_str))
Beispiel #24
0
 def __init__(self, rule_str):
     self.rule_tokens, addr_str = Rule.part(rule_str)
     Address.__init__(self, addr_str)
Beispiel #25
0
def test_rule_match_rule_all():

    # Be careful of the 全! It will bite you!

    rule = Rule('臺北市,中正區,八德路1段,全')
    assert rule.match(Address('臺北市中正區八德路1段1號'))
    assert rule.match(Address('臺北市中正區八德路1段9號'))
    assert not rule.match(Address('臺北市中正區八德路2段1號'))
    assert not rule.match(Address('臺北市中正區八德路2段9號'))

    rule = Rule('臺北市,中正區,三元街,單全')
    assert rule.match(Address('臺北市中正區三元街1號'))
    assert not rule.match(Address('臺北市中正區三元街2號'))
    assert not rule.match(Address('臺北市中正區大埔街1號'))

    rule = Rule('臺北市,大同區,哈密街,   45巷全')
    assert rule.match(Address('臺北市大同區哈密街45巷1號'))
    assert rule.match(Address('臺北市大同區哈密街45巷9號'))
    assert not rule.match(Address('臺北市大同區哈密街46巷1號'))
    assert not rule.match(Address('臺北市大同區哈密街46巷9號'))
Beispiel #26
0
    #r = Rule('台北市信義區市府路10號以下')
    #print r.tokens

    #a = Address('市府路1號')
    #print a.tokens
    #print r.match(a)

    #a = Address('台北市信義區市府路1號')
    #print a.tokens
    #print r.match(a)

    r = Rule('新北市,中和區,景平路,雙  64號以下')
    print r.tokens

    a = Address('新北市景平路64巷13弄13號')
    print a.tokens
    print r.match(a)

########NEW FILE########
__FILENAME__ = zipcodetw_server
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import zipcodetw
from flask import Flask, render_template, request, jsonify

app = Flask(__name__)


@app.route('/')
Beispiel #27
0
 def __init__(self, rule_str):
     self.rule_tokens, addr_str = Rule.part(rule_str)
     Address.__init__(self, addr_str)
Beispiel #28
0
def test_address_init_normalization_chinese_number():

    assert Address.normalize('八德路') == '八德路'
    assert Address.normalize('三元街') == '三元街'

    assert Address.normalize('三號') == '3號'
    assert Address.normalize('十八號') == '18號'
    assert Address.normalize('三十八號') == '38號'

    assert Address.normalize('三段') == '3段'
    assert Address.normalize('十八路') == '18路'
    assert Address.normalize('三十八街') == '38街'

    assert Address.normalize('信義路一段') == '信義路1段'
    assert Address.normalize('敬業一路') == '敬業1路'
    assert Address.normalize('愛富三街') == '愛富3街'
Beispiel #29
0
def test_address_init_subno():

    expected_tokens = [(u'', u'', u'臺北', u'市'), (u'', u'', u'大安', u'區'),
                       (u'', u'', u'市府', u'路'), (u'1', u'之1', u'', u'號')]
    assert Address(u'臺北市大安區市府路1之1號').tokens == expected_tokens
    assert Address('臺北市大安區市府路1之1號').tokens == expected_tokens
Beispiel #30
0
def test_address_init():

    expected_tokens = ((u'', u'', u'臺北', u'市'), (u'', u'', u'大安', u'區'),
                       (u'', u'', u'市府', u'路'), (u'1', u'', u'', u'號'))
    assert Address(u'臺北市大安區市府路1號').tokens == expected_tokens
    assert Address('臺北市大安區市府路1號').tokens == expected_tokens
Beispiel #31
0
 def __init__(self, addr_str):
     self.tokens = Address.tokenize(addr_str)