def put(self, head_addr_str, tail_rule_str, zipcode): addr = Address(head_addr_str) # (a, b, c) self.put_precise( addr.flat(), head_addr_str+tail_rule_str, zipcode ) # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,) len_tokens = len(addr) for f in range(len_tokens): for l in range(f, len_tokens): self.put_gradual( addr.flat(f, l+1), zipcode ) if len_tokens >= 3: # (a, b, c, d) -> (a, c) self.put_gradual(addr.pick_to_flat(0, 2), zipcode)
def test_address_flat(): addr = Address('臺北市大安區市府路1之1號') assert addr.flat(1) == addr.flat(-3) == '臺北市' assert addr.flat(2) == addr.flat(-2) == '臺北市大安區' assert addr.flat(3) == addr.flat(-1) == '臺北市大安區市府路' assert addr.flat() == '臺北市大安區市府路1之1號'
def test_address_flat(): addr = Address('臺北市大安區市府路1之1號') assert addr.flat(1) == addr.flat(-3) == u'臺北市' assert addr.flat(2) == addr.flat(-2) == u'臺北市大安區' assert addr.flat(3) == addr.flat(-1) == u'臺北市大安區市府路' assert addr.flat() == u'臺北市大安區市府路1之1號'
def test_address_flat(): addr = Address("臺北市大安區市府路1之1號") assert addr.flat(1) == addr.flat(-3) == u"臺北市" assert addr.flat(2) == addr.flat(-2) == u"臺北市大安區" assert addr.flat(3) == addr.flat(-1) == u"臺北市大安區市府路" assert addr.flat() == u"臺北市大安區市府路1之1號"
def find(self, addr_str): addr = Address(addr_str) len_addr_tokens = len(addr.tokens) # avoid unnecessary iteration start_len = len_addr_tokens while start_len >= 0: if addr.parse(start_len-1) == (0, 0): break start_len -= 1 for i in range(start_len, 0, -1): addr_str = addr.flat(i) rzpairs = self.get_rule_str_zipcode_pairs(addr_str) # for handling insignificant tokens and redundant unit if ( # It only runs once, and must be the first iteration. i == start_len and len_addr_tokens >= 4 and addr.tokens[2][Address.UNIT] in u'村里' and not rzpairs ): if addr.tokens[3][Address.UNIT] == u'鄰': # delete the insignificant token (whose unit is 鄰) del addr.tokens[3] len_addr_tokens -= 1 if len_addr_tokens >= 4 and addr.tokens[3][Address.UNIT] == u'號': # empty the redundant unit in the token addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME], u'') else: # delete insignificant token (whose unit is 村 or 里) del addr.tokens[2] rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3)) if rzpairs: for rule_str, zipcode in rzpairs: if Rule(rule_str).match(addr): return zipcode gzipcode = self.get_gradual_zipcode(addr_str) if gzipcode: return gzipcode return u''
def find(self, addr_str): addr = Address(addr_str) len_addr_tokens = len(addr.tokens) # avoid unnecessary iteration start_len = len_addr_tokens while start_len >= 0: if addr.parse(start_len - 1) == (0, 0): break start_len -= 1 for i in range(start_len, 0, -1): addr_str = addr.flat(i) rzpairs = self.get_rule_str_zipcode_pairs(addr_str) # for handling insignificant tokens and redundant unit if ( # It only runs once, and must be the first iteration. i == start_len and len_addr_tokens >= 4 and addr.tokens[2][Address.UNIT] in u'村里' and not rzpairs): if addr.tokens[3][Address.UNIT] == u'鄰': # delete the insignificant token (whose unit is 鄰) del addr.tokens[3] len_addr_tokens -= 1 if len_addr_tokens >= 4 and addr.tokens[3][ Address.UNIT] == u'號': # empty the redundant unit in the token addr.tokens[2] = (u'', u'', addr.tokens[2][Address.NAME], u'') else: # delete insignificant token (whose unit is 村 or 里) del addr.tokens[2] rzpairs = self.get_rule_str_zipcode_pairs(addr.flat(3)) if rzpairs: for rule_str, zipcode in rzpairs: if Rule(rule_str).match(addr): return zipcode gzipcode = self.get_gradual_zipcode(addr_str) if gzipcode: return gzipcode return u''
def put(self, head_addr_str, tail_rule_str, zipcode): addr = Address(head_addr_str) # (a, b, c) self.put_precise(addr.flat(), head_addr_str + tail_rule_str, zipcode) # (a, b, c) -> (a,); (a, b); (a, b, c); (b,); (b, c); (c,) len_tokens = len(addr) for f in range(len_tokens): for l in range(f, len_tokens): self.put_gradual(addr.flat(f, l + 1), zipcode) if len_tokens >= 3: # (a, b, c, d) -> (a, c) self.put_gradual(addr.pick_to_flat(0, 2), zipcode)