json_source = execjs.compile(jsstr).call("b.decode", json_source) #print(json.loads(json_source)) json_source = json.loads(json_source) analysis_json(json_source) print("完成解析:" + city) time.sleep(2) #休息N秒 except Exception as e: print("抓取异常:" + city) time.sleep(2) #休息N秒 #MS Sql Server 链接字符串 ms = MSSQL(host="172.16.12.35", user="******", pwd="sa", db="SmallIsBeautiful_2017-03-15") #MongoDB 数据库链接 client = pymongo.MongoClient('172.16.21.232', 27017) db = client.OriginalData #主程序 def main(): now = datetime.datetime.now() print("spider_demo10_www.zq12369.com 开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S')) jsstr = get_js()
column_2 = td[2].getText().strip() column_3 = td[3].getText().strip().replace(' ','').replace('\n','').replace('?','') #数据不规范,中间空格,包含?等 column_4 = td[4].getText().strip() column_5 = td[5].getText().strip() column_6 = td[6].getText().strip() column_7 = td[7].getText().strip() #print(column_0 + "|"+column_1 + "|"+column_2 + "|"+column_3 + "|"+column_4 + "|"+column_5 + "|"+column_6 + "|"+column_7) sql = "insert into Space0011A values ('%s','%s','%s','%s','%s','%s','%s','%s')" % (column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7) #sql = "insert into Space0011A values ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}')".format(column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7) ms.ExecNonQuery(sql.encode('utf-8')) pass #MS Sql Server 链接字符串 ms = MSSQL(host=".",user="******",pwd="sa",db="SmallIsBeautiful") def main(): now = datetime.datetime.now() print("开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S')) sql = "delete from Space0011A" #清除历史数据 ms.ExecNonQuery(sql.encode('utf-8')) html = download_page(DOWNLOAD_URL) allPageNum = parse_html(html) for num in range(int(allPageNum)): print("准备抓取页码:" + str(num)) try: html = download_page_post(DOWNLOAD_URL,num + 1) parse_html_post(html) except: #Exception as e:
ms.ExecNonQuery(sql.encode('utf-8')) print('【AQI】:' + province + " " + city + " " + forecasttime) cookies = {} raw_cookies = 'UM_distinctid=15b8f2f73e810e-0568aad45294d8-5d4e211f-232800-15b8f2f73e91da; UM_distinctid=15baed9d9bd446-082e358903e5e6-5d4e211f-232800-15baed9d9be4f9; followcity=54511%2C58367%2C59493%2C57516%2C58321%2C57679%2C58847; CNZZDATA1254743953=1454190746-1492752603-%7C1496822745; JSESSIONID=AE68987A917F5EB0ADFF073481F60470' for line in raw_cookies.split(':'): key, value = line.split('=', 1) cookies[key] = value proxies = {"https": "http://41.118.132.69:4433"} #MS Sql Server 链接字符串 ms = MSSQL(host=".", user="******", pwd="sa", db="SmallIsBeautiful_2017-03-15") #MongoDB 数据库链接 client = pymongo.MongoClient('172.16.21.232', 27017) db = client.OriginalData def main(): now = datetime.datetime.now() print(now.strftime('%H%M%S%f')) print("开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S')) json_provices = [{ "code": "ABJ", "name": "北京市", "url": "/publish/forecast/ABJ.html"
if isRepeat[0][0] == 0 and value[2] != '0': sql = "insert into T_EnvQuality_AirCityDayData (MonitorTime,FK_RegionCode,AQI) values ('%s','140200','%s') " %(c_time.strftime('%Y/%m/%d'), value[2]) ms.ExecNonQuery(sql.encode('utf-8')) #print("城市AQI日数据插入成功,时间:" + c_time.strftime('%Y/%m/%d') + " 站点:" + point[2] + " AQI:" + value[2]) else: sql = "select count(PK_ID) from T_EnvQuality_AirStationDayData where MonitorTime='%s' and FK_AirID='%s' " %(c_time.strftime('%Y/%m/%d'), point[3]) isRepeat = ms.ExecQuery(sql.encode('utf-8')) if isRepeat[0][0] == 0 and value[2] != '0': sql = "insert into T_EnvQuality_AirStationDayData (MonitorTime,FK_RegionCode,AQI,FK_AirID,FK_StationCode) values ('%s','140200','%s','%s','%s') " %(c_time.strftime('%Y/%m/%d'), value[2], point[3], point[1]) ms.ExecNonQuery(sql.encode('utf-8')) #print("站点AQI日数据插入成功,时间:" + c_time.strftime('%Y/%m/%d') + " 站点:" + point[2] + " AQI:" + value[2]) #MS Sql Server 链接字符串 ms = MSSQL(host="172.16.12.20",user="******",pwd="sa",db="DB_DC_DaTongV1") #主程序 def main(): now = datetime.datetime.now() print("开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S')) try: start_hour(now) start_day(now) except Exception as e: print("出现异常")