Example #1
0
def _car_brand(doc, cardict):
    '''
    车辆品牌规整化
    '''
    src = doc['car_brand']
    title = doc['car_title']
    brand = doc['car_brand']
    tar = u"其它"
    gc={"series":"","brand":""}
    if src:
        if len(src)>20:
            src=src[:20]
        src = ''.join(src.split())
        src = cardict.washWord(src)

        if cardict.has_brand(src):
            gc['brand'] = src
            return gc

        values = cardict.findBrandByPinyin(Chinese.pinyin(src))
        if values:
            gc['brand'] = values[0]
            return gc
        
        fenci = segmentRequest(src)
        gc = guess_car(fenci,cardict)
        if gc is not None and gc["brand"]!="":  
            if not cardict.has_brand(gc["brand"]):
                gc['brand']=""
        return gc       
             
    return gc
Example #2
0
def _car_series(doc,CarDict,):
    '''
    车辆系列规整化
    '''
    src = doc['car_series']
    title = doc['car_title']
    brand = doc['car_brand']
    tar = u"其它"
    gc={"series":"","brand":""}
    if src is not None and src!='':
        src = ''.join(src.split())
        if len(src)>20:
            src=src[:20]
        src = CarDict.washWord(src)
        if CarDict.has_series(src):
            gc["series"]= src
            gc["brand"]=CarDict.get_brand_by_series(src)
            return gc
        
        srcre=src.replace(doc['car_brand'],"").strip()
        if CarDict.has_series(srcre):
            gc["series"] = srcre
            gc["brand"]=CarDict.get_brand_by_series(srcre)
            return gc

#        if CarDict.getCarSpecification()['series_synonyms'].has_key(src):
#            gc['series'] = CarDict.getCarSpecification()['series_synonyms'][src]
#            gc["brand"]=CarDict.getCarSpecification()['series'][gc['series']]["brand"]
#            return gc
        
        values=CarDict.findSeriesByPinyin(Chinese.pinyin(src))
        if values and len(values)>0:
            gc["series"] = values[0]
            gc["brand"]=CarDict.get_brand_by_series(values[0])
            return gc
        
        fenci=segmentRequest(src)
        gc = guess_car(fenci,CarDict,doc['car_brand'])
        if gc is not None and gc["brand"]!="":  
            if not CarDict.has_brand(gc["brand"]):
                gc['brand']=""
    return gc