예제 #1
0
def loadCommonExcel(hospitalALL, path):

    data = xlrd.open_workbook(path)
    print(data.sheet_names())
    table = data.sheet_by_name(data.sheet_names()[0])
    row = table.nrows
    for i in range(row):
        if i == 0:
            continue
        hospital = Hospital()
        hospital.oldName = table.cell(i, 0).value.strip()
        hospital.name = table.cell(i, 2).value.strip()
        hospital.privince = table.cell(i, 4).value
        hospital.city = table.cell(i, 5).value
        # 区
        hospital.area = table.cell(i, 3).value
        hospital.address = table.cell(i, 6).value
        hospital.kind = table.cell(i, 7).value
        hospital.level = table.cell(i, 9).value

        if (hospital.name.find('检验') != -1):
            hospital.type = '检验中心'
        elif (hospital.name.find('妇幼') != -1):
            hospital.type = '妇幼'
        elif (hospital.name.find('儿童') != -1):
            hospital.type = '儿童'
        else:
            hospital.type = '综合'

        hospitalALL[hospital.name] = hospital
예제 #2
0
def buildModel(province, allHospital):
    url = "http://www.yixue.com/" + province + "医院列表"
    print(url)

    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36'
    }
    html = requests.get(url, headers).text
    soup = BeautifulSoup(html, "html.parser")
    try:
        num = soup.select('#mw-content-text ul li b a').__len__()
        for i in range(num):
            hospital = Hospital()
            hospital.name = soup.select(
                '#mw-content-text ul li b a')[i].contents[0]
            hospital.address = soup.select('#mw-content-text ul li ul')[
                i].contents[0].contents[1].replace(':', '').replace('\n', '')
            hospital.level = soup.select('#mw-content-text ul li ul'
                                         )[i].contents[2].contents[1].replace(
                                             ':', '').replace('\n', '')
            hospital.type = '医院'
            hospital.kind = ''
            allHospital.append(hospital)

            hospital.print()
    except IndexError:
        hospital.print()
예제 #3
0
def loadCommonExcel(hospitalALL, path, code, name, addr, kind):
    data = xlrd.open_workbook(path)
    print(data.sheet_names())
    table = data.sheet_by_name(data.sheet_names()[0])
    row = table.nrows
    for i in range(row):
        hospital = Hospital()
        if (code != -1):
            hospital.code = table.cell(i, code).value
        hospital.name = table.cell(i, name).value.strip()
        hospital.address = table.cell(i, addr).value.strip()
        hospital.type = '医院'
        if (kind != -1):
            hospital.kind = table.cell(i, kind).value.strip()
        hospitalALL.append(hospital)
예제 #4
0
def baidu(key, hospitalNew, hospitalBLL):
    # key = "成县妇幼保健院"
    url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=" + key + "&rsv_pq=c975914300115949&rsv_t=e7f3%2FJ8sovjmaqT%2B6p6ID4KVYbFRyG9dPQjqKtszA7eNO7jE0ynUBwuzYek&rqlang=cn&rsv_enter=1&rsv_dl=tb&rsv_sug3=3&rsv_sug1=3&rsv_sug7=101&rsv_sug2=0&inputT=2503&rsv_sug4=4616&rsv_sug=2&&usm=3&rsv_idx=2&rsv_page=1"
    # 上面这行代码是在百度首页查询python关键字,将此网站赋值给url
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0"}  # 设置网站请求头
    params = {
        'kw': key
    }

    response = requests.get(url, headers=headers, params=params)  # 对网站进行get请求,并伪装成浏览器进行请求
    response.encoding = "utf-8"  # 设置网页编码格式为utf-8
    # 3、打印浏览器解析的内容
    html = response.text  # 将网页源代码的文本文件赋值给html
    content = response.content  # 将网页源代码的二进制文件赋值给content
    print(html)  # 打印网页源代码的文本文件
    print(content)  # 打印二进制源码文件
    print("response.status_code:", response.status_code)  # 打印状态码,结果为200时表示请求成功
    print("headers:", response.headers)  # 打印网页的头部headers信息
    soup = BeautifulSoup(html, "lxml")
    # 4、打印查找到的标题信息
    print(soup.findAll("h3"))  # 经查实所有的标题信息在h3标签里,故打印h3标签的内容
    list1 = []
    hospital = Hospital()
    hospital.onlyCode = key
    try:
        searchName = soup.findAll("h3")[0].text.replace("_百度地图", "").strip()
        searchAddress = soup.select(".op-map-singlepoint-info-right")[0].text
        print(searchName, searchAddress)
        hospital.name = searchName

        hospital.address = searchAddress
        if len(hospital.address) > 6:
            df = cpca.transform([hospital.address], cut=False)
            hospital.code = hospital.address
            hospital.name = hospital.name
            hospital.oldName = df.iat[0, 2]
            hospital.privince = df.iat[0, 0]
            hospital.city = df.iat[0, 1]
            hospital.address = df.iat[0, 3]
        else:
            hospital.code = hospital.address
            hospital.name = hospital.name
            hospital.oldName = ''
            hospital.privince = ''
            hospital.city = ''
            hospital.address = hospital.address
        if (hospital.name.find('检验') != -1):
            hospital.type = '检验中心'
        elif (hospital.name.find('妇幼') != -1):
            hospital.type = '妇幼'
        elif (hospital.name.find('儿童') != -1):
            hospital.type = '儿童'
        else:
            hospital.type = '医院'
        hospital.level = ""
        hospitalNew.append(hospital)
    except:
        hospital.code = ""
        hospital.name = key
        hospital.oldName = ""
        hospital.privince = ""
        hospital.city = ""
        hospital.address = ""
        hospital.type = ""
        hospital.kind = ""
        hospital.level = ""
        hospital.onlyCode = ""
        hospitalBLL.append(hospital)
        print("eerrrorrr")