def setUp(self): BaseTestCase.setUp(self) for area in areas: area.put() for word in excludeWords: ExcludeWordCharCache.put(word) address.put()
def _hasAreaName(cls, areaName, string): """ 判断在string中是否包含区域名称 """ position = string.find(areaName) if position >= 0: followString = string[position + len(areaName):] if ExcludeWordCharCache.isStartWith(followString): return cls._hasAreaName(areaName, followString) else: return True
def testLongest(self): self.assertTrue(ExcludeWordCharCache.isStartWith(u"西街路口"))
def testOverlap(self): self.assertTrue(ExcludeWordCharCache.isStartWith(u"中街"))
def testNoOverlap(self): self.assertTrue(ExcludeWordCharCache.isStartWith(u"西路23号"))
def testMatchInMiddle(self): self.assertFalse(ExcludeWordCharCache.isStartWith(u"上中路口"))
def testPartMatch(self): self.assertFalse(ExcludeWordCharCache.isStartWith(u"中"))
def testEmptyReader(self): self.assertFalse(ExcludeWordCharCache.isStartWith(u""))
def clear(cls): cls.deleteAll() ExcludeWordCharCache.clear()
def put(self): db.Model.put(self) ExcludeWordCharCache.put(self.word)
def setUp(self): BaseTestCase.setUp(self) for word in excludeWords: ExcludeWordCharCache.put(word)
def testMatch(self): self.prepareService(u'{"word":"南路"}').post() self.assertTrue(ExcludeWordCharCache.isStartWith(u"南路"))
def parse(cls, address, parent=None): """ 从指定地址中分析出包含的区域 """ for i in range(len(address)): # 获取匹配的区域, 如果指定parent, 则匹配的区域必须是parent的下级区域 (areas, depth) = AreaCharCache.getMatchedAreas(address[i:]) areas = [Area.getByCode(code) for code in areas] areas = [ area for area in areas if (not parent) or cls._isChild(parent, area) ] logging.debug("got areas[%s] for %s" % (",".join([area.name for area in areas]), address)) if len(areas) > 0: followStart = i + depth #如果区域后面跟着特定的词语,如”路“, 则忽略这个区域 #如”湖南路“就不应该认为是”湖南省“ if ExcludeWordCharCache.isStartWith(address[followStart:]): continue #如果区域还有下级区域的话,则在后面字符串中继续查找下级区域,然后用找到的下级区域代替上级区域 childrenAreas = [] for area in areas: if area.hasChild: children = cls.parse(address[followStart:], area) if len(children) > 0: childrenAreas.extend(children) if len(childrenAreas) > 0: areas = childrenAreas logging.debug("got children areas[%s] for %s" % (",".join([area.name for area in areas]), address)) #如果在结果集中同时存在上级和下级区域,则去除上级区域 parentAreas = [] for area in areas: parentAreas.extend(cls._getParents(area)) for area in parentAreas: if area in areas: areas.remove(area) logging.debug("areas[%s] after removed parents for %s" % (",".join([area.name for area in areas]), address)) #如果存在多个结果的时候,在后续字符中查找上级区域,找到的必然是正确的结果 if len(areas) > 1: matchedParents = [ area for area in areas if cls._hasParent(area, address[followStart:]) ] if len(matchedParents) > 0: areas = matchedParents logging.debug("areas[%s] after matched parents for %s" % (",".join([area.name for area in areas]), address)) return areas '''record unparsed address --- BENSON''' unparsedAddress = Address(name=address) unparsedAddress.put() UnparsedAddressSending.sendMail(unparsedAddress) return []