Beispiel #1
0
def query(entity, attr):
    soup = To.get_html_baidu("http://baike.baidu.com/item/" + entity)
    basicInfo_block = soup.find(class_='basic-info cmn-clearfix')
    if basicInfo_block == None:
        # print 'info None'
        return attr + "::找不到"
    else:
        info = get_info(basicInfo_block)
        # for i in info:
        #     print i
        #     print info[i]
        # print '-----------'
        if info.has_key(attr.decode('utf8')):
            # print 'has key'+attr.decode('utf8')
            return info[attr.decode('utf8')]
        else:
            # print 'no key 进行同义词判断'
            # 同义词判断
            attr_list = T.load_baikeattr_name(
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(
                attr,
                os.path.dirname(os.path.split(os.path.realpath(__file__))[0]) +
                '/resources/SynonDic.txt', attr_list)
            if info.has_key(attr.decode('utf8')):
                return info[attr.decode('utf8')]
            else:
                return attr + "::找不到"
Beispiel #2
0
def query(entity,attr):
    soup = To.get_html_baike("http://baike.baidu.com/item/"+entity)
    basicInfo_block = soup.find(class_= 'basic-info cmn-clearfix')
    if not basicInfo_block:
        return '找不到'
    else:
        info  = get_info(basicInfo_block)
        ##print(info)
        if info.get(attr):
            return info[attr].strip()
        else:
            attr_list = T.load_baikeattr_name(os.path.dirname(os.path.split(os.path.realpath(__file__))[0])+'/resources/Attribute_name.txt')
            attr = T.load_synonyms_word_inattr(attr,os.path.dirname(os.path.split(os.path.realpath(__file__))[0])+'/resources/SynonDic.txt',attr_list)
            if info.get(attr):
                return info[attr].strip()
            else:
                return '找不到'