コード例 #1
0
ファイル: qunarCity.py プロジェクト: OCsource/qunar_crawler
def getScenery(city_name):
    operate = DB.operateDB()
    result = operate.searchCity(city_name)
    if operate.countCity() and not result:
        print('没有这个城市!')
        return
    if not result:
        url = 'http://travel.qunar.com/place'
        getCityURL(url)
        result = operate.searchCity(city_name)
    city_id = result[0][0]
    city_number = result[0][2]
    if not city_number:
        logUtil.getLogger(1).error(city_name + ':没有该旅游城市信息')
    else:
        cityPinYin = "".join(Pinyin().get_pinyin(city_name).split('-'))
        pools = ThreadPoolExecutor(3)
        all_pools = []
        # 景点网页拼接,攻略网页拼接,美食区
        scnery_website = 'https://travel.qunar.com/p-cs' + city_number + '-' + cityPinYin + '-jingdian'
        strategy_website = 'https://travel.qunar.com/travelbook/list/22-' + cityPinYin + '-' + str(
            city_number) + '/hot_heat/1.htm'
        cate_website = 'https://travel.qunar.com/p-cs' + str(
            city_number) + '-' + cityPinYin + '-meishi?page=1'

        all_pools.append(
            pools.submit(qunarScenery.getScenery, ((scnery_website, city_id))))
        all_pools.append(
            pools.submit(qunarStrategy.getStrategy,
                         ((strategy_website, city_name))))
        all_pools.append(
            pools.submit(qunarCate.getCate, ((cate_website, city_id))))

        wait(all_pools, return_when=ALL_COMPLETED)
コード例 #2
0
ファイル: DB.py プロジェクト: OCsource/qunar_crawler
 def __init__(self):
     self.__dbName = 'qunar'
     self.__user = '******'
     self.__password = '******'
     self.__host = 'localhost'
     self.__char = 'utf8'
     self.logger = logUtil.getLogger(0)
コード例 #3
0
import requests,random,time,re
from bs4 import BeautifulSoup
from python0_1.qunar_crawler.saveToDB import DB
from python0_1.qunar_crawler.common import headMsg
from python0_1.qunar_crawler.utils import logUtil

User_Agent = headMsg.UA
logger = logUtil.getLogger(1)

# 将攻略内容爬取
# 参数:相应的攻略url,攻略编号
def getStrategyContent(url,strategy_number):
    operate = DB.operateDB()
    Hostreferer = {
        'User-Agent': random.choice(User_Agent),
        'Referer': 'https://www.qunar.com'
    }
    html = requests.get(url=url,headers=Hostreferer,timeout=5).text
    soup = BeautifulSoup(html, "html.parser")
    try:
        # print(url)
        # 爬取每个攻略页面的内容
        box = soup.find('div', class_='container main-container')
        strategy_att = box.find('ul', class_='foreword_list')
        playTime = strategy_att.find('li', class_='f_item howlong')
        playTime = playTime.find('span', class_='data') if playTime != None else None
        playTime = playTime.string if playTime != None else 0
        cost = strategy_att.find('li', class_='f_item howmuch')
        cost = cost.find('span', class_='data').string if cost != None else 0
        theme = strategy_att.find('li', class_='f_item how')
        theme = theme.find('span', class_='data') if theme != None else None