def get_res(url):
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
    }

    res = requests.get(url, headers=headers).json()
    items = res["data"]["items"]
    for i in items:
        e = etree.HTML(i["context"])
        proName2 = i["proName"]
        parentName1 = i["parentName"]
        context = i["context"]
        # print(i["context"])
        title = i["title"]
        # print(title)
        id = i["id"]
        # print(title)
        lujing = e.xpath("//img[@alt]/@src")
        programaId = i["programaId"]
        proParentID = i["proParentID"]
        created = int(
            time.mktime(time.strptime(i["created"],
                                      '%Y-%m-%d %H:%M:%S'))) * 1000
        for j in lujing:
            print(j)
            if "documents" in j:
                print(j)

                conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
                cur = conn.cursor()
                SQL = 'insert into huanjingdatadeal2_copy1 (title,url,id_id,programaId,created,proParentID,documentsurl,parentName1,proName2) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
                cur.execute(SQL, (title, url, id, programaId, created,
                                  proParentID, j, parentName1, proName2))
                conn.commit()
                print("写入成功")
                cur.close()
                conn.close()
            elif "http://www.encollege.cn/imageFile/hjxx/" in j:
                res1 = requests.get(j, headers=headers)
                if res1.status_code != 200:
                    conn = pool.connection(
                    )  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
                    cur = conn.cursor()
                    SQL = 'insert into huanjingdatadeal2_copy1 (title,url,id_id,programaId,created,proParentID,documentsurl,parentName1,proName2) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
                    cur.execute(SQL, (title, url, id, programaId, created,
                                      proParentID, j, parentName1, proName2))
                    conn.commit()
                    print("写入成功")
                    cur.close()
                    conn.close()
            else:
                continue
Beispiel #2
0
def get_road(dataiter):
    for jk in range(0, len(dataiter) + 1):
        i = dataiter[jk]
        addresses = i[6]
        id = i[0]
        # lane = i[5]
        if "路" in addresses and "区" in addresses:
            road = re.findall("区(.*?)路", addresses)
            print(addresses)
            print(road)
            conn2 = pool.connection()
            cur2 = conn2.cursor()
            print(id, i[3], i[4])
            sql1 = 'UPDATE tb_tenxun_copy1 SET road="%s" WHERE id="%s"'
            cur2.execute(sql1 % ("{}路".format(road[0]), id))
            conn2.commit()
            print("提交成功")
            # print(i[0],i[3],i[1])
            cur2.close()
            conn2.close()
        elif "道" in addresses and "区" in addresses:
            road = re.findall("区(.*?)道", addresses)
            print(addresses)
            print(road)
            conn2 = pool.connection()
            cur2 = conn2.cursor()
            print(id, i[3], i[4])
            sql1 = 'UPDATE tb_tenxun_copy1 SET road="%s" WHERE id="%s"'
            cur2.execute(sql1 % ("{}道".format(road[0]), id))
            conn2.commit()
            print("提交成功")
            # print(i[0],i[3],i[1])
            cur2.close()
            conn2.close()
        elif "街" in addresses and "区" in addresses:
            road = re.findall("区(.*?)街", addresses)
            print(addresses)
            print(road)
            conn2 = pool.connection()
            cur2 = conn2.cursor()
            print(id, i[3], i[4])
            sql1 = 'UPDATE tb_tenxun_copy1 SET road="%s" WHERE id="%s"'
            cur2.execute(sql1 % ("{}街".format(road[0]), id))
            conn2.commit()
            print("提交成功")
            # print(i[0],i[3],i[1])
            cur2.close()
            conn2.close()
def save_sql(data_processing_list, poi_id):
    isupdataid = 0
    for i in data_processing_list:
        try:
            print("===============i===========")
            print(i)
            # print('============================i["children"],i["photos"]=============')
            # print(i["children"],i["photos"])
            # type = i["detail_info"]["tag"]
            conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
            cur = conn.cursor()
            SQL = 'insert into tb_tenxun (province,area,town,addresses,unit,longitude,latitude,cType) value ("%s","%s","%s","%s","%s","%s","%s","%s")'
            cur.execute(
                SQL %
                (i["ad_info"]["province"], i["ad_info"]["district"],
                 i["ad_info"]["city"], i["address"], i["title"],
                 i["location"]["lng"], i["location"]["lat"], i["category"]))
            conn.commit()
            print("插入成功")
            cur.close()
            conn.close()

        except Exception as e:
            isupdataid = 1
            print(e)
            print("提交失败")

    if isupdataid == 0:
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        # SQL = 'insert into tb_tenxun (province,area,town,addresses,unit,longitude,latitude,type) value ("%s","%s","%s","%s","%s","%s","%s","%s")'
        SQL = 'UPDATE tenxun_poi SET isspider=1 WHERE id="%s"'
        cur.execute(SQL % (poi_id))
        conn.commit()
        print("修改成功")
        cur.close()
        conn.close()
        print("改写id成功")
    else:
        print("不修改此条数据")
Beispiel #4
0
def get_hao_l(dataiter):
    for jk in range(0, len(data) + 1):
        i = dataiter[jk]
        id = i[0]
        addresses = i[6]
        if "号" in addresses and "-" not in addresses and "一" not in addresses and "、" not in addresses and "~" not in addresses:
            doorplate = re.findall("上海市.*?(\d+)号", addresses)
            if doorplate:
                conn2 = pool.connection()
                cur2 = conn2.cursor()
                sql1 = 'UPDATE tb_tenxun_copy1 SET doorplate="%s" WHERE id="%s"'
                cur2.execute(sql1 % ("{}号".format(doorplate[0]), id))
                conn2.commit()
                # print("提交成功")
                # print(i[0],i[3],i[1])
                cur2.close()
                conn2.close()
            # print(addresses)
            # print(doorplate)
        elif "-" in addresses:
            doorplate = re.findall("上海市.*?(\d+)-(\d+)号", addresses)
            if doorplate and "N" not in addresses and "L" not in addresses and "G" not in addresses and "F" not in addresses:
                if int(doorplate[0][0]) < int(doorplate[0][1]):
                    print(int(doorplate[0][0]))
                    print(int(doorplate[0][1]))
                    conn2 = pool.connection()
                    cur2 = conn2.cursor()
                    sql1 = 'UPDATE tb_tenxun_copy1 SET doorplate="%s" WHERE id="%s"'
                    cur2.execute(sql1 % ("{}-{}号".format(
                        int(doorplate[0][0]), int(doorplate[0][1])), id))
                    conn2.commit()
                    # print("提交成功")
                    # print(i[0],i[3],i[1])
                    cur2.close()
                    conn2.close()
                    print(addresses)
                    print(doorplate)
        else:
            continue
def select_poi_sql():
    try:
        conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
        cur = conn.cursor()
        SQL = "select id,poi from tenxun_poi where isspider=0"
        cur.execute(SQL)
        addres = cur.fetchall()
        iter_address = iter(addres)
        return iter_address
        # conn.commit()
        # print("插入成功")
    except Exception as e:
        print(e)
    finally:
        cur.close()
        conn.close()
Beispiel #6
0
def get_lane(dataiter):
    for jk in range(0, len(dataiter) + 1):
        i = dataiter[jk]
        addresses = i[6]
        id = i[0]
        if "弄" in addresses:
            lane = re.findall(".*?(\d+)弄", addresses)
            print(addresses)
            print(lane)
            if lane:
                conn2 = pool.connection()
                cur2 = conn2.cursor()
                print(id, i[3], i[4])
                sql1 = 'UPDATE tb_tenxun_copy1 SET lane="%s" WHERE id="%s"'
                cur2.execute(sql1 % ("{}弄".format(lane[0]), id))
                conn2.commit()
                print("提交成功")
                # print(i[0],i[3],i[1])
                cur2.close()
                conn2.close()
Beispiel #7
0
def get_address(dataiter):
    for jk in range(0, len(dataiter) + 1):
        i = dataiter[jk]
        addresses = i[6]
        id = i[0]
        road = i[5]
        # print(i[16])
        doorplate = i[8]
        print(i)
        if road and doorplate:
            conn2 = pool.connection()
            cur2 = conn2.cursor()
            # print(id, i[3],i[4])
            sql1 = 'UPDATE tb_tenxun_copy1 SET address="%s" WHERE id="%s"'
            cur2.execute(sql1 %
                         ("{}{}{}{}".format(i[3], road, i[6], doorplate), id))
            conn2.commit()
            print("提交成功")
            # print(i[0],i[3],i[1])
            cur2.close()
            conn2.close()
Beispiel #8
0
def get_village(dataiter):
    for jk in range(0, len(dataiter) + 1):
        i = dataiter[jk]
        addresses = i[6]
        id = i[0]
        road = i[16]
        # print(i[16])
        if "镇" in road:
            village = re.findall("(.*?)镇", road)
            print(road)
            print(village)
            if village:
                conn2 = pool.connection()
                cur2 = conn2.cursor()
                # print(id, i[3],i[4])
                sql1 = 'UPDATE tb_tenxun_copy1 SET village="%s" WHERE id="%s"'
                cur2.execute(sql1 % ("{}镇".format(village[0]), id))
                conn2.commit()
                print("提交成功")
                # print(i[0],i[3],i[1])
                cur2.close()
                conn2.close()
Beispiel #9
0
def get_res_and_save(parms_list):
    for i in parms_list:
        # print(i)
        time.sleep(3)
        headers = {
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36",
        }
        url = "https://www.encollege.cn/gwapi/article/find"
        res = requests.get(url, headers=headers, params=i).json()
        items = res["data"]["items"]
        for i in items:
            context = i["context"]
            e = etree.HTML(str(context))
            # print(type(context))
            # print(context)
            proName2 = i["proName"]
            parentName1 = i["parentName"]
            # context = i["context"]
            # print(i["context"])
            title = i["title"]
            # print(title)
            id = i["id"]
            # print(title)
            lujing = e.xpath("//img[@alt]/@src")
            programaId = i["programaId"]  #programaId
            proParentID = i["proParentID"]
            createdlocal = i["created"]
            created = int(
                time.mktime(time.strptime(i["created"],
                                          '%Y-%m-%d %H:%M:%S'))) * 1000
            for j in lujing:
                print(j)
                if "documents" in j:
                    firstnum = re.findall("/documents/(\d+)/\d+", j)[0]
                    secondnum = re.findall("/documents/\d+/(\d+)", j)[0]
                    print(j)
                    if "t=" in j:
                        tunix = j.split("t=")[1]
                        tunixlocal = time.strftime(
                            "%Y-%m-%d %H:%M:%S",
                            time.localtime(int(int(tunix) / 1000)))
                    else:
                        tunix = None
                        tunixlocal = None
                    conn = pool.connection(
                    )  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
                    cur = conn.cursor()
                    SQL = 'insert into hjxx_home_copy1 (title,url,id_id,programaId,created,proParentID,documentsurl,parentName1,proName2,firstnum,secondnum,timeunix,timeunixlocal,createdlocal) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
                    cur.execute(
                        SQL, (title, url, id, programaId, created, proParentID,
                              j, parentName1, proName2, firstnum, secondnum,
                              tunix, tunixlocal, createdlocal))
                    conn.commit()
                    print("写入成功")
                    cur.close()
                    conn.close()
                # elif "http://www.encollege.cn/imageFile/hjxx/" in j:
                #     res1 = requests.get(j,headers=headers)
                #     if res1.status_code !=200:
                #         conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
                #         cur = conn.cursor()
                #         SQL = 'insert into hjxx_home (title,url,id_id,programaId,created,proParentID,documentsurl,parentName1,proName2) values (%s,%s,%s,%s,%s,%s,%s,%s,%s)'
                #         cur.execute(SQL, (title, url, id, programaId, created, proParentID, j, parentName1, proName2))
                #         conn.commit()
                #         print("写入成功")
                #         cur.close()
                #         conn.close()
                else:
                    continue
Beispiel #10
0
        print(i)
        if road and doorplate:
            conn2 = pool.connection()
            cur2 = conn2.cursor()
            # print(id, i[3],i[4])
            sql1 = 'UPDATE tb_tenxun_copy1 SET address="%s" WHERE id="%s"'
            cur2.execute(sql1 %
                         ("{}{}{}{}".format(i[3], road, i[6], doorplate), id))
            conn2.commit()
            print("提交成功")
            # print(i[0],i[3],i[1])
            cur2.close()
            conn2.close()


conn = pool.connection()  # 以后每次需要数据库连接就是用connection()函数获取连接就好了
cur1 = conn.cursor()
SQL = 'select * from tb_tenxun_copy1'
cur1.execute(SQL)
data = cur1.fetchall()
dataiter = data
cur1.close()
conn.close()
# 分离出号
# get_hao_l(dataiter)
# 分离出路
# get_road(dataiter)
# 分离出弄
# get_lane(dataiter)
# 分离出镇
# get_village(dataiter)
Beispiel #11
0
import requests
from lxml import etree
from tools.sqlconn import pool
url = "https://lbs.qq.com/service/webService/webServiceGuide/webServiceAppendix"
headers = {
    "user-agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"
}
res = requests.get(url, headers=headers)

etreepoi = etree.HTML(res.text)
poilist = etreepoi.xpath(
    '//*[@id="__layout"]/div/div[2]/div/div[2]/div/div/article/div/div/table/tbody/tr/td[3]/text()'
)
print(poilist)
for i in poilist:
    conn2 = pool.connection()
    cur2 = conn2.cursor()
    # print(id, i[3], i[4])
    sql1 = 'insert into tenxun_poi (poi) values ("%s")'
    cur2.execute(sql1 % (i))
    conn2.commit()
    print("提交成功")
    # print(i[0],i[3],i[1])
    cur2.close()
    conn2.close()
# print(etreepoi)