def _car_brand(doc, cardict): ''' 车辆品牌规整化 ''' src = doc['car_brand'] title = doc['car_title'] brand = doc['car_brand'] tar = u"其它" gc={"series":"","brand":""} if src: if len(src)>20: src=src[:20] src = ''.join(src.split()) src = cardict.washWord(src) if cardict.has_brand(src): gc['brand'] = src return gc values = cardict.findBrandByPinyin(Chinese.pinyin(src)) if values: gc['brand'] = values[0] return gc fenci = segmentRequest(src) gc = guess_car(fenci,cardict) if gc is not None and gc["brand"]!="": if not cardict.has_brand(gc["brand"]): gc['brand']="" return gc return gc
def _car_series(doc,CarDict,): ''' 车辆系列规整化 ''' src = doc['car_series'] title = doc['car_title'] brand = doc['car_brand'] tar = u"其它" gc={"series":"","brand":""} if src is not None and src!='': src = ''.join(src.split()) if len(src)>20: src=src[:20] src = CarDict.washWord(src) if CarDict.has_series(src): gc["series"]= src gc["brand"]=CarDict.get_brand_by_series(src) return gc srcre=src.replace(doc['car_brand'],"").strip() if CarDict.has_series(srcre): gc["series"] = srcre gc["brand"]=CarDict.get_brand_by_series(srcre) return gc # if CarDict.getCarSpecification()['series_synonyms'].has_key(src): # gc['series'] = CarDict.getCarSpecification()['series_synonyms'][src] # gc["brand"]=CarDict.getCarSpecification()['series'][gc['series']]["brand"] # return gc values=CarDict.findSeriesByPinyin(Chinese.pinyin(src)) if values and len(values)>0: gc["series"] = values[0] gc["brand"]=CarDict.get_brand_by_series(values[0]) return gc fenci=segmentRequest(src) gc = guess_car(fenci,CarDict,doc['car_brand']) if gc is not None and gc["brand"]!="": if not CarDict.has_brand(gc["brand"]): gc['brand']="" return gc