Beispiel #1
0
    "请输入爬取场景类型编号(回车查询全部):\n['0-汽车服务', '1-汽车销售', '2-汽车维修', '3-摩托车服务', '4-餐饮服务', '5-购物服务', \n "
    "'6-生活服务', '7-体育休闲服务', '8-医疗保健服务', '9-住宿服务', '10-风景名胜', '11-商务住宅', '12-政府机构及社会团体', \n "
    "'13-科教文化服务', '14-交通设施服务', '15-金融保险服务', '16-公司企业', '17-道路附属设施', '18-公共设施', '19-地名地址信息']\n"
)

city = city_list[int(cityIndex)]
city_id = city_id_list[int(cityIndex)]
sqlparam = ""
if typeIndex != '':
    poi_type = poi_type_list[int(typeIndex)]
    sqlparam = " and ptype = '" + poi_type + "'"
else:
    poi_type = "全部"
print("查询" + city + "的" + poi_type + "数据")
#查询对应city的grid1600数据
ora = cxOracle(dbname, dbpass, dbaddr)

# 先删除异常表中爬取的ID的数据
print("删除异常表中的ID爬取的数据")
#ora.Exec("DELETE PY_POI_POINT WHERE CITY='"+city_id+"' "+sqlparam+" AND ID IN(SELECT ID FROM PY_POI_EXCEPTION WHERE CITY ='"+city_id+"' "+sqlparam+") ")
#ora.Exec("DELETE PY_POI_EXCEPTION WHERE CITY ='"+city_id+"' "+sqlparam+" ")

# 查询栅格表
rs = ora.Query(
    "SELECT city, gridid, ptype, id, name FROM PY_POI WHERE CITY ='" +
    city_id + "' " + sqlparam + " "
    "AND ID NOT IN (SELECT ID FROM PY_POI_POINT WHERE CITY ='" + city_id +
    "' " + sqlparam + ") AND ISSEL IS NULL "
    "ORDER BY GRIDID ASC ")

proxy_ip = get_proxy()
Beispiel #2
0
def spider(startIndex, endIndex):
    sqlparam = " and ptype = '" + poi_type + "'"
    sqlstart = " and tt.rowno >=" + startIndex
    sqlend = " and rownum <=" + endIndex

    print("查询" + city + "的" + poi_type + "数据")

    #查询对应city的grid1600数据
    ora = cxOracle(dbname, dbpass, dbaddr)

    # 查询栅格表
    rs = ora.Query(
        "select tt.* from ("
        "select t.*,rownum as rowno from("
        "SELECT city, gridid, ptype, id, name FROM PY_POI WHERE CITY ='" +
        city_id + "' " + sqlparam + " "
        "AND ID NOT IN (SELECT ID FROM PY_POI_POINT WHERE CITY ='" + city_id +
        "' " + sqlparam + ") AND (ISSEL IS NULL or ISBONS ='1')"
        "ORDER BY GRIDID ASC)t where 1=1 " + sqlend + ")tt where 1=1 " +
        sqlstart)

    proxy_ip = get_proxy()
    if proxy_ip == None:
        raise NameError('代理池返回ip为空')

    for f in rs:
        print("GRID=" + str(f[1]) + ",ID=" + str(f[3]))
        OBJECTID = str(f[1])
        id = str(f[3])
        while True:  # 使用while循环不断获取数据
            datas = ""
            area = 0
            try:
                bouns = getBounById2(id, proxy_ip)
                #bouns = getBounById(id)
                print("bouns=" + str(bouns))
                for bound in bouns:
                    if bound != None:
                        lon = bound[0]
                        lat = bound[1]
                        area = bound[2]
                        datas += str(lon) + "," + str(lat) + ";"
                print("area=" + str(area) + ",datas=" + str(datas))
                print("ComputeArea=" + str(ComputeArea(datas[:-1])))
                if abs(area - ComputeArea(datas[:-1])) < 2000:
                    for bound in bouns:
                        if bound != None:
                            lon = bound[0]
                            lat = bound[1]
                            lon1, lat1 = gcj02towgs84(float(lon), float(lat))
                            sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str(
                                lon) + "," + str(lat) + "," + str(
                                    lon1) + "," + str(lat1) + ")"
                            ora.Exec(sql2)
                    break
                time.sleep(random.randint(1, 3))
            except Exception as e:
                e = str(e).replace('\'', '`')
                print(e)
                sql3 = "insert into PY_POI_EXCEPTION values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "','" + e + "',sysdate)"
                ora.Exec(sql3)
                # 换ip
                proxy_ip = get_proxy()
                if proxy_ip == None:
                    raise NameError('代理池返回ip为空')
        # 更新poi字段为已查询
        sql4 = ""
        if area == 0:
            sql4 = "UPDATE PY_POI SET ISSEL = '1',ISBONS = '0' WHERE ID = '" + id + "' "
        else:
            sql4 = "UPDATE PY_POI SET ISSEL = '1',ISBONS = '1' WHERE ID = '" + id + "' "
        ora.Exec(sql4)
        time.sleep(random.randint(1, 3))
    print(city + ":" + poi_type + "的" + startIndex + "-" + endIndex +
          "的数据爬取完成")