Beispiel #1
0
def spider(startIndex, endIndex):
    sqlparam = " and ptype = '" + poi_type + "'"
    sqlstart = " and tt.rowno >=" + startIndex
    sqlend = " and rownum <=" + endIndex

    print("查询" + city + "的" + poi_type + "数据")

    #查询对应city的grid1600数据
    ora = cxOracle(dbname, dbpass, dbaddr)

    # 查询栅格表
    rs = ora.Query(
        "select tt.* from ("
        "select t.*,rownum as rowno from("
        "SELECT city, gridid, ptype, id, name FROM PY_POI WHERE CITY ='" +
        city_id + "' " + sqlparam + " "
        "AND ID NOT IN (SELECT ID FROM PY_POI_POINT WHERE CITY ='" + city_id +
        "' " + sqlparam + ") AND (ISSEL IS NULL or ISBONS ='1')"
        "ORDER BY GRIDID ASC)t where 1=1 " + sqlend + ")tt where 1=1 " +
        sqlstart)

    proxy_ip = get_proxy()
    if proxy_ip == None:
        raise NameError('代理池返回ip为空')

    for f in rs:
        print("GRID=" + str(f[1]) + ",ID=" + str(f[3]))
        OBJECTID = str(f[1])
        id = str(f[3])
        while True:  # 使用while循环不断获取数据
            datas = ""
            area = 0
            try:
                bouns = getBounById2(id, proxy_ip)
                #bouns = getBounById(id)
                print("bouns=" + str(bouns))
                for bound in bouns:
                    if bound != None:
                        lon = bound[0]
                        lat = bound[1]
                        area = bound[2]
                        datas += str(lon) + "," + str(lat) + ";"
                print("area=" + str(area) + ",datas=" + str(datas))
                print("ComputeArea=" + str(ComputeArea(datas[:-1])))
                if abs(area - ComputeArea(datas[:-1])) < 2000:
                    for bound in bouns:
                        if bound != None:
                            lon = bound[0]
                            lat = bound[1]
                            lon1, lat1 = gcj02towgs84(float(lon), float(lat))
                            sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str(
                                lon) + "," + str(lat) + "," + str(
                                    lon1) + "," + str(lat1) + ")"
                            ora.Exec(sql2)
                    break
                time.sleep(random.randint(1, 3))
            except Exception as e:
                e = str(e).replace('\'', '`')
                print(e)
                sql3 = "insert into PY_POI_EXCEPTION values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "','" + e + "',sysdate)"
                ora.Exec(sql3)
                # 换ip
                proxy_ip = get_proxy()
                if proxy_ip == None:
                    raise NameError('代理池返回ip为空')
        # 更新poi字段为已查询
        sql4 = ""
        if area == 0:
            sql4 = "UPDATE PY_POI SET ISSEL = '1',ISBONS = '0' WHERE ID = '" + id + "' "
        else:
            sql4 = "UPDATE PY_POI SET ISSEL = '1',ISBONS = '1' WHERE ID = '" + id + "' "
        ora.Exec(sql4)
        time.sleep(random.randint(1, 3))
    print(city + ":" + poi_type + "的" + startIndex + "-" + endIndex +
          "的数据爬取完成")
Beispiel #2
0
    "ORDER BY GRIDID ASC ")

proxy_ip = get_proxy()
if proxy_ip == None:
    raise NameError('代理池返回ip为空')

for f in rs:
    print("GRID=" + str(f[1]) + ",ID=" + str(f[3]))
    OBJECTID = str(f[1])
    id = str(f[3])
    id = "B0FFGFHRBZ"
    while True:  # 使用while循环不断获取数据
        datas = ""
        area = 0
        try:
            bouns = getBounById2(id, proxy_ip)
            for bound in bouns:
                if bound != None:
                    lon = bound[0]
                    lat = bound[1]
                    area = bound[2]
                    datas += str(lon) + "," + str(lat) + ";"
            if abs(area - ComputeArea(datas[:-1])) < 1:
                for bound in bouns:
                    if bound != None:
                        lon = bound[0]
                        lat = bound[1]
                        lon, lat = gcj02towgs84(float(lon), float(lat))
                        sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str(
                            lon) + "," + str(lat) + ")"
                        ora.Exec(sql2)
Beispiel #3
0
 address = pois[i]['address']
 if len(address) == 0:
     address = ""
 type = pois[i]['type']
 # 插入数据
 center_x, center_y = gcj02towgs84(float(center_x),
                                   float(center_y))
 sql = "insert into py_poi values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "','" + name + "'," + str(
     center_x
 ) + "," + str(
     center_y
 ) + ",'" + pname + "','" + pcode + "','" + cityname + "','" + citycode + "','" + adname + "','" + adcode + "','" + address + "','" + type + "') "
 ora.Exec(sql)
 # 查找边界并插入
 try:
     bouns = getBounById2(id)
     for bound in bouns:
         if bound != None:
             lon = bound[0]
             lat = bound[1]
             lon, lat = gcj02towgs84(float(lon), float(lat))
             sql2 = "insert into py_poi_point values('" + city_id + "','" + OBJECTID + "','" + poi_type + "','" + id + "'," + str(
                 lon) + "," + str(lat) + ")"
             ora.Exec(sql2)
 except:
     bouns = getBounById2(id)
     for bound in bouns:
         if bound != None:
             lon = bound[0]
             lat = bound[1]
             lon, lat = gcj02towgs84(float(lon), float(lat))