def url_usr_agent(fromsta, tosta, date): #proxy = {'http':'27.24.158.155:84'} proxy = {'http': '110.4.24.173:80'} proxy_support = urllib2.ProxyHandler(proxy) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) i_headers = { 'User-Agent': 'Mozilla/5.0(Windows NT 6.1) AppleWebKit/537.36(KHTML,like Geko) Chrome/31.0.1650.48' } staname2code = getstaname2code() web = ( u"https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=%s&from_station=%s&to_station=%s" % (date, staname2code[fromsta], staname2code[tosta])) req = urllib2.Request(web, headers=i_headers) response = urllib2.urlopen(req) result = response.read() if result == "-1": print "there isn't any train from %s to %s on %s" % (fromsta, tosta, date) return -1 try: data = json.loads(result)["data"]["datas"] except KeyError: print "there isn't any train from %s to %s on %s" % (fromsta, tosta, date) return -1 recordlist = DoubleLink() for i, ticketInfo in enumerate(data): temp = [{"station_train_code":ticketInfo["station_train_code"],\ "start_station_name":ticketInfo["start_station_name"],\ "to_station_name":ticketInfo["to_station_name"],\ "start_time":ticketInfo["start_time"],\ "arrive_time":ticketInfo["arrive_time"],\ "swz_num":ticketInfo["swz_num"],\ "ydz_num":ticketInfo["zy_num"],\ "edz_num":ticketInfo["ze_num"],\ "rw_num":ticketInfo["rw_num"],\ "yw_num":ticketInfo["yw_num"],\ "yz_num":ticketInfo["yz_num"],\ "wz_num":ticketInfo["wz_num"],\ "canWebBuy":ticketInfo["canWebBuy"]}]#,can't omit if recordlist.getlength() == 0: recordlist.initlist(temp) else: recordlist.append(temp[0]) return recordlist
def spidereasyone(fromsta, tosta, date): #fromsta = u'合肥' #tosta = u'淮南' #date = '2016-02-10' staname2code = getstaname2code() requeststr = ( u"https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=%s&from_station=%s&to_station=%s" % (date, staname2code[fromsta], staname2code[tosta])) #print requeststr request = urllib2.Request(requeststr) print "attempt to get infromation form kyfw.12306.cn" response = urllib2.urlopen(request) print "inquiry successly" result = response.read() #print result data = json.loads(result)["data"]["datas"] recordlist = DoubleLink() sta2stare = open("./data/sta2stare", "w") for i, ticketInfo in enumerate(data): temp = [{"station_train_code":ticketInfo["station_train_code"],\ "start_station_name":ticketInfo["start_station_name"],\ "to_station_name":ticketInfo["to_station_name"],\ "start_time":ticketInfo["start_time"],\ "arrive_time":ticketInfo["arrive_time"],\ "swz_num":ticketInfo["swz_num"],\ "ydz_num":ticketInfo["zy_num"],\ "edz_num":ticketInfo["ze_num"],\ "rw_num":ticketInfo["rw_num"],\ "yw_num":ticketInfo["yw_num"],\ "yz_num":ticketInfo["yz_num"],\ "wz_num":ticketInfo["wz_num"],\ "canWebBuy":ticketInfo["canWebBuy"]}]#,can't omit if recordlist.getlength() == 0: recordlist.initlist(temp) else: recordlist.append(temp[0]) sta2stare.write(str(recordlist.getitem(i))) sta2stare.write('\n') sta2stare.write(str(recordlist)) sta2stare.close() #display(recordlist) return recordlist
def spidereasyone(fromsta,tosta,date): #fromsta = u'合肥' #tosta = u'淮南' #date = '2016-02-10' staname2code = getstaname2code() requeststr= (u"https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=%s&from_station=%s&to_station=%s"%(date,staname2code[fromsta],staname2code[tosta])) #print requeststr request = urllib2.Request(requeststr) print "attempt to get infromation form kyfw.12306.cn" response = urllib2.urlopen(request) print "inquiry successly" result = response.read() #print result data = json.loads(result)["data"]["datas"] recordlist = DoubleLink() sta2stare = open("./data/sta2stare","w") for i,ticketInfo in enumerate(data): temp = [{"station_train_code":ticketInfo["station_train_code"],\ "start_station_name":ticketInfo["start_station_name"],\ "to_station_name":ticketInfo["to_station_name"],\ "start_time":ticketInfo["start_time"],\ "arrive_time":ticketInfo["arrive_time"],\ "swz_num":ticketInfo["swz_num"],\ "ydz_num":ticketInfo["zy_num"],\ "edz_num":ticketInfo["ze_num"],\ "rw_num":ticketInfo["rw_num"],\ "yw_num":ticketInfo["yw_num"],\ "yz_num":ticketInfo["yz_num"],\ "wz_num":ticketInfo["wz_num"],\ "canWebBuy":ticketInfo["canWebBuy"]}]#,can't omit if recordlist.getlength() == 0: recordlist.initlist(temp) else: recordlist.append(temp[0]) sta2stare.write(str(recordlist.getitem(i))) sta2stare.write('\n') sta2stare.write(str(recordlist)) sta2stare.close() #display(recordlist) return recordlist
def url_usr_agent(fromsta,tosta,date): #proxy = {'http':'27.24.158.155:84'} proxy = {'http':'110.4.24.173:80'} proxy_support = urllib2.ProxyHandler(proxy) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) i_headers = {'User-Agent':'Mozilla/5.0(Windows NT 6.1) AppleWebKit/537.36(KHTML,like Geko) Chrome/31.0.1650.48'} staname2code = getstaname2code() web = (u"https://kyfw.12306.cn/otn/lcxxcx/query?purpose_codes=ADULT&queryDate=%s&from_station=%s&to_station=%s"%(date,staname2code[fromsta],staname2code[tosta])) req = urllib2.Request(web,headers = i_headers) response = urllib2.urlopen(req) result = response.read() if result == "-1": print "there isn't any train from %s to %s on %s"%(fromsta,tosta,date) return -1 try: data = json.loads(result)["data"]["datas"] except KeyError: print "there isn't any train from %s to %s on %s"%(fromsta,tosta,date) return -1 recordlist = DoubleLink() for i,ticketInfo in enumerate(data): temp = [{"station_train_code":ticketInfo["station_train_code"],\ "start_station_name":ticketInfo["start_station_name"],\ "to_station_name":ticketInfo["to_station_name"],\ "start_time":ticketInfo["start_time"],\ "arrive_time":ticketInfo["arrive_time"],\ "swz_num":ticketInfo["swz_num"],\ "ydz_num":ticketInfo["zy_num"],\ "edz_num":ticketInfo["ze_num"],\ "rw_num":ticketInfo["rw_num"],\ "yw_num":ticketInfo["yw_num"],\ "yz_num":ticketInfo["yz_num"],\ "wz_num":ticketInfo["wz_num"],\ "canWebBuy":ticketInfo["canWebBuy"]}]#,can't omit if recordlist.getlength() == 0: recordlist.initlist(temp) else: recordlist.append(temp[0]) return recordlist
#/usr/bin/env python #-*-coding:utf8-*- import urllib2 import urllib import json import sys import spiderpractice sys.path.append("data") from staname2code import getstaname2code staname2code = getstaname2code() #print staname2code stacode = staname2code.keys() date = u"2016-02-11" sta2stalist = [] for fromsta in stacode: for tosta in stacode: if fromsta != tosta: tmp = [fromsta, tosta] sta2stalist.append(tmp) print len(sta2stalist) for j in range(1): print j + 1, "/", len(sta2stalist) #recordlist = spiderpractice.spidereasyone(sta2stalist[j][0],sta2stalist[j][1],date) recordlist = spiderpractice.spidereasyone(u"合肥", u"淮南", date) print recordlist if recordlist == -1: continue else: sta2stalistfile = open("./database/%s" % (sta2stalist[j]), "w")
"arrive_time":ticketInfo["arrive_time"],\ "swz_num":ticketInfo["swz_num"],\ "ydz_num":ticketInfo["zy_num"],\ "edz_num":ticketInfo["ze_num"],\ "rw_num":ticketInfo["rw_num"],\ "yw_num":ticketInfo["yw_num"],\ "yz_num":ticketInfo["yz_num"],\ "wz_num":ticketInfo["wz_num"],\ "canWebBuy":ticketInfo["canWebBuy"]}]#,can't omit if recordlist.getlength() == 0: recordlist.initlist(temp) else: recordlist.append(temp[0]) return recordlist staname2code = getstaname2code() #print staname2code stacode = staname2code.keys() date = u"2016-02-11" sta2stalist=[] for fromsta in stacode: for tosta in stacode: if fromsta != tosta: tmp = [fromsta,tosta] sta2stalist.append(tmp) #print len(sta2stalist) #for j in range(len(sta2stalist)): for j in range(400000,len(sta2stalist)): num = open("numofscan","r+") num.write(str(j)) num.close