コード例 #1
0
# 尝试乱序的爬取方式
order = {}
tool = {}
orderlist = []
rlist = []
data = []
oneline = {}
city = ['北京', '上海', '广州']

for i in city:
    for j in city:
        if i is not j:
            for date in range(20200706, 202007015):
                order['city1'] = i
                order['city2'] = j
                order['date'] = str(date)
                tool = copy.copy(order)
                orderlist.append(tool)  # 初始指令池
random.shuffle(orderlist)  # 随机排序

for od in orderlist:
    print(od.values())
    data = list(message(getdata(od['city1'], od['city2'], str(od['date']))))
    for i in data:
        if i['Airline'] not in rlist:
            rlist.append(i['Airline'])  # 添加新的公司到名单
    time.sleep(random.random() * 30)  # 随机休眠时间而且比较长,过于规律会被禁封
print(rlist)

# ['南方航空', '东方航空', '上海航空', '海南航空', '中国国航', '吉祥航空', '金鹏航空', '春秋航空', '厦门航空', '中国联合航空', '天津航空']
コード例 #2
0
from mainspider_post import getdata, message
import time
import random

# 一个子程序用于确定在爬虫范围内的航空公司名单
airline = []
data = []
city = ['北京', '上海', '广州']
for i in city:
    for j in city:
        if i is not j:
            for date in range(20200706, 20200720):
                print(i, j)
                data = data + list(message(getdata(i, j, str(date))))
                time.sleep(random.random() * 30)  # 随机休眠时间而且比较长,过于规律会被禁封
for i in data:
    if i['AirLine'] not in airline:
        airline.append(i['AirLine'])
print(airline)
コード例 #3
0
ファイル: flightsteward.py プロジェクト: FBIdd/FlightSteward
import mainspider_post
from crawler_donghang import donghangcrawler
from crawler_jixiang import jixiangcrawler
from crawler_lianhang import lianhangcrawler
from multiprocessing import Process

if __name__ == "__main__":
    dcity = input("请选择出发城市(北京、上海、广州): ")
    acity = input("请选择到达城市(北京、上海、广州): ")
    date = input("请输入日期('20200706'): ")
    airline = mainspider_post.message(
        mainspider_post.getdata(dcity, acity, date), dcity, acity, date)
    print(airline)
    #  获取航空公司名单(已经去重),爬取携程票据
    #  定义爬虫进程
    csvlist = []
    donghangPro = Process(target=donghangcrawler, args=(dcity, acity, date))
    jixiangPro = Process(target=jixiangcrawler, args=(dcity, acity, date))
    lianhangPro = Process(target=lianhangcrawler, args=(dcity, acity, date))
    for i in airline:
        if i == '东方航空':
            print('定位到东方航空')
            donghangPro.start()
            donghangPro.join()
            csvlist.append(["donghang.csv"])
        elif i == '吉祥航空':
            print('定位到吉祥航空')
            jixiangPro.start()
            csvlist.append("jixiang.csv")
            print("吉祥航空爬虫开始运行")
        elif i == '中国联合航空':
コード例 #4
0
import mainspider_post
from crawler_donghang import donghangcrawler
from crawler_jixiang import jixiangcrawler
from crawler_lianhang import lianhangcrawler
from multiprocessing import Process

dcity = input("请选择出发城市(北京、上海、广州): ")
acity = input("请选择到达城市(北京、上海、广州): ")
date = input("请输入日期('20200706'): ")
airline = mainspider_post.message(mainspider_post.getdata(dcity, acity, date), dcity, acity, date)
print(airline)
#  获取航空公司名单(已经去重),爬取携程票据
#  定义爬虫进程
donghangPro = Process(target=donghangcrawler, args=(dcity, acity, date))
jixiangPro = Process(target=jixiangcrawler, args=(dcity, acity, date))
lianhangPro = Process(target=lianhangcrawler, args=(dcity, acity, date))
csvlist = []
for i in airline:
	if i is '东方航空':
		donghangPro.start()
		csvlist.append(["donghang.csv"])
	elif i is '吉祥航空':
		jixiangPro.start()
		csvlist.append("jixiang.csv")
		print("吉祥航空爬虫开始运行")
	elif i is '中国联合航空':
		lianhangPro.start()
		csvlist.append("lianghang.csv")
		print("中国联合航空爬虫开始运行")
for i in airline:  # 主进程阻塞等待子进程完成
	if i is '东方航空':