コード例 #1
0
def parse_detail(lianjia_id, html):
    community = Community(lianjia_id=lianjia_id)
    item_types = {
        '建筑类型': 'building_type',
        '物业费用': 'property_fee',
        '物业公司': 'property_company',
        '开发商': 'develop_company',
        '楼栋总数': 'building_num',
        '房屋总数': 'house_num'
    }
    bs = BeautifulSoup(html, 'html.parser')
    info_items = bs.find_all('div', attrs={'class', 'xiaoquInfoItem'})
    for item in info_items:
        span_list = item.find_all('span')

        if len(span_list) == 2 and span_list[0].string in item_types:
            pro_name = item_types.get(span_list[0].string)
            pro_value = span_list[-1].string.strip()
            if 'building_num' == pro_name or 'house_num' == pro_name:
                pro_value = str2num(pro_value)
            community.__setattr__(pro_name, pro_value)
        else:
            print('解析小区基本信息异常,原始html:[%s]' % span_list)
    return community