json_source = execjs.compile(jsstr).call("b.decode", json_source)
        #print(json.loads(json_source))
        json_source = json.loads(json_source)

        analysis_json(json_source)

        print("完成解析:" + city)
        time.sleep(2)  #休息N秒
    except Exception as e:
        print("抓取异常:" + city)
        time.sleep(2)  #休息N秒


#MS Sql Server 链接字符串
ms = MSSQL(host="172.16.12.35",
           user="******",
           pwd="sa",
           db="SmallIsBeautiful_2017-03-15")

#MongoDB 数据库链接
client = pymongo.MongoClient('172.16.21.232', 27017)
db = client.OriginalData


#主程序
def main():
    now = datetime.datetime.now()
    print("spider_demo10_www.zq12369.com 开始时间:" +
          now.strftime('%Y-%m-%d %H:%M:%S'))

    jsstr = get_js()
Ejemplo n.º 2
0
            column_2 = td[2].getText().strip()
            column_3 = td[3].getText().strip().replace(' ','').replace('\n','').replace('?','') #数据不规范,中间空格,包含?等
            column_4 = td[4].getText().strip()
            column_5 = td[5].getText().strip()
            column_6 = td[6].getText().strip()
            column_7 = td[7].getText().strip()
            #print(column_0 + "|"+column_1 + "|"+column_2 + "|"+column_3 + "|"+column_4 + "|"+column_5 + "|"+column_6 + "|"+column_7)

            sql = "insert into Space0011A values ('%s','%s','%s','%s','%s','%s','%s','%s')"  % (column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7)
            
            #sql = "insert into Space0011A values ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}')".format(column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7)
            ms.ExecNonQuery(sql.encode('utf-8'))
    pass

#MS Sql Server 链接字符串
ms = MSSQL(host=".",user="******",pwd="sa",db="SmallIsBeautiful")

def main():
    now = datetime.datetime.now()
    print("开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S'))  

    sql = "delete from Space0011A"  #清除历史数据
    ms.ExecNonQuery(sql.encode('utf-8'))
    html = download_page(DOWNLOAD_URL)
    allPageNum = parse_html(html)
    for num in range(int(allPageNum)):
        print("准备抓取页码:" + str(num))
        try:
            html = download_page_post(DOWNLOAD_URL,num + 1)
            parse_html_post(html)
        except: #Exception as e:
            ms.ExecNonQuery(sql.encode('utf-8'))
            print('【AQI】:' + province + " " + city + " " + forecasttime)


cookies = {}

raw_cookies = 'UM_distinctid=15b8f2f73e810e-0568aad45294d8-5d4e211f-232800-15b8f2f73e91da; UM_distinctid=15baed9d9bd446-082e358903e5e6-5d4e211f-232800-15baed9d9be4f9; followcity=54511%2C58367%2C59493%2C57516%2C58321%2C57679%2C58847; CNZZDATA1254743953=1454190746-1492752603-%7C1496822745; JSESSIONID=AE68987A917F5EB0ADFF073481F60470'

for line in raw_cookies.split(':'):
    key, value = line.split('=', 1)
    cookies[key] = value

proxies = {"https": "http://41.118.132.69:4433"}

#MS Sql Server 链接字符串
ms = MSSQL(host=".", user="******", pwd="sa", db="SmallIsBeautiful_2017-03-15")

#MongoDB 数据库链接
client = pymongo.MongoClient('172.16.21.232', 27017)
db = client.OriginalData


def main():
    now = datetime.datetime.now()
    print(now.strftime('%H%M%S%f'))
    print("开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S'))

    json_provices = [{
        "code": "ABJ",
        "name": "北京市",
        "url": "/publish/forecast/ABJ.html"
Ejemplo n.º 4
0
                if isRepeat[0][0] == 0 and value[2] != '0':
                    sql = "insert into T_EnvQuality_AirCityDayData (MonitorTime,FK_RegionCode,AQI) values ('%s','140200','%s') " %(c_time.strftime('%Y/%m/%d'), value[2])
                    ms.ExecNonQuery(sql.encode('utf-8'))
                    #print("城市AQI日数据插入成功,时间:" + c_time.strftime('%Y/%m/%d') + " 站点:" + point[2] + " AQI:" + value[2])
            else:
                sql = "select count(PK_ID) from T_EnvQuality_AirStationDayData where MonitorTime='%s' and FK_AirID='%s' " %(c_time.strftime('%Y/%m/%d'), point[3])
                isRepeat = ms.ExecQuery(sql.encode('utf-8'))
                if isRepeat[0][0] == 0 and value[2] != '0':
                    sql = "insert into T_EnvQuality_AirStationDayData (MonitorTime,FK_RegionCode,AQI,FK_AirID,FK_StationCode) values ('%s','140200','%s','%s','%s') " %(c_time.strftime('%Y/%m/%d'), value[2], point[3], point[1])
                    ms.ExecNonQuery(sql.encode('utf-8'))
                    #print("站点AQI日数据插入成功,时间:" + c_time.strftime('%Y/%m/%d') + " 站点:" + point[2] + " AQI:" + value[2])
            


#MS Sql Server 链接字符串
ms = MSSQL(host="172.16.12.20",user="******",pwd="sa",db="DB_DC_DaTongV1")


#主程序
def main():

    now = datetime.datetime.now()
    print("开始时间:" + now.strftime('%Y-%m-%d %H:%M:%S'))  

    try:
        start_hour(now)
        start_day(now)
    except Exception as e:
        print("出现异常")