コード例 #1
0
import sys,os
sys.path.append(os.path.dirname(__file__) + os.sep + '../')

from datetime import datetime
import time
from import_data_to_mysql import con_db
from common_tool import get_response, redis_return_operation, get_log
from setting import db_setting
from import_data_to_redis import RedisCache_checkAPI

"""
英雄联盟官网爬虫
"""

match_yxlmgw_log = get_log('match_yxlmgw')

team_list = ['RNG', 'ES', 'EDG', 'LGD', 'IG', 'BLG', 'TES', 'SN', 'WE',
            'OMG', 'DMO', 'LNG', 'JDG', 'FPX', 'RW', 'VG', 'V5']

#   英雄联盟官网的url  headers---> headers_yxlmgw
# 正常情况下赛程页面是 url_finish_1:显示2条已完成  url_matching:进行中(status为‘-1’就是没有进行中的比赛) url_unfinish: 未开始

#  url_finish_2, url_finish_3 :包含一周内已完成的比赛
#  url_unfinish_2, url_unfinish_3 :包含一周内已完成的比赛

url_finish_1 = 'https://apps.game.qq.com/lol/match/apis/searchBMatchInfo_bak.php?p8=5&p1=134&p4=3&p2=%C8%AB%B2%BF&p9=&p10=&p6=2&p11=&p12=&page=1&pagesize=2&_='
url_finish_2 = 'https://apps.game.qq.com/lol/match/apis/searchBMatchInfo_bak.php?p8=5&p1=134&p4=&p2=%C8%AB%B2%BF&p9=&p10=&p6=2&p11={0}&p12=&page=1&pagesize=8&_='


コード例 #2
0
import sys,os
sys.path.append(os.path.dirname(__file__) + os.sep + '../')

from datetime import datetime
from common_tool import get_response, get_log, redis_check
from import_data_to_mysql import con_db
from import_data_to_redis import RedisCache_checkAPI
from setting import db_setting

"""
雷竞技网英雄联盟赔率爬虫
url: https://www.ray83.com/match/37198305
"""

leijingji_log = get_log('leijingji')
# 爬虫流程:
# 开始加载两页赛程的url:start_url, second_url
# start_url = 'https://incpgameinfo.esportsworldlink.com/v2/match?page=1&match_type=2',
# second_url ='https://incpgameinfo.esportsworldlink.com/v2/match?page=2&match_type=2'
# 从start_url和second_url中拿到id,拼凑得到详情url
# 详情url中拿到对应赔率url:https://incpgameinfo.esportsworldlink.com/v2/odds?match_id=37219633

# 今日
start_url = 'https://incpgameinfo.esportsworldlink.com/v2/match?page=1&match_type=2'

# 滚盘
gunpan_url1 = 'https://incpgameinfo.esportsworldlink.com/v2/match?page=1&match_type=1'
gunpan_url2 = 'https://incpgameinfo.esportsworldlink.com/v2/match?page=1&match_type=0'
gunpan_url3 = 'https://incpgameinfo.esportsworldlink.com/v2/match?page=2&match_type=0'
gunpan_urls = [gunpan_url1, gunpan_url2, gunpan_url3]
コード例 #3
0
"""

start_url = 'https://www.scoregg.com/services/api_url.php'

header = {
    'user-agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
    ' Chrome/84.0.4147.89 Safari/537.36'
}

league_exclude = [
    '2020 LCK夏季升降级赛', '2019KeSPA杯', '2019拉斯维加斯全明星', 'LPL公开训练赛', '2017 KPL秋季赛'
]
position_dict = {'上单': 1, '打野': 2, '中单': 3, 'ADC': 4, '辅助': 5, 'None': 6}

lol_player_log = get_log('lol_player')

# 请求英雄联盟联赛id的form_data  url:https://www.scoregg.com/services/api_url.php
form_data_yxlm = {
    'api_path': '/services/match/web_tournament_group_list.php',
    'method': 'GET',
    'platform': 'web',
    'api_version': '9.9.9',
    'language_id': 1,
    'gameID': 1,
    'type': 'all',
    'page': 1,
    'limit': 18,
    'year': ''
}
コード例 #4
0
# 淘汰赛
url_knockout1 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1596067200&end_time=1596153600&seasonid=KCC2020S'
url_knockout2 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1596153600&end_time=1596240000&seasonid=KCC2020S'
url_knockout3 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1596240000&end_time=1596326400&seasonid=KCC2020S'
url_knockout4 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1596326400&end_time=1596412800&seasonid=KCC2020S'
url_knockout5 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1596844800&end_time=1596931200&seasonid=KCC2020S'
url_knockout6 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1596931200&end_time=1597017600&seasonid=KCC2020S'
url_knockout7 = 'https://app.tga.qq.com/openapi/tgabank/getSchedules?appid=10005&sign=K8tjxlHDt7HHFSJTlxxZW4A%2BalA%3D&begin_time=1597536000&end_time=1597622400&seasonid=KCC2020S'

url_knockout_list = [
    url_knockout1, url_knockout2, url_knockout3, url_knockout4, url_knockout5,
    url_knockout6, url_knockout7
]

match_wzry_log = get_log('match_wzry')
redis = RedisCache_checkAPI()
db = con_db(db_setting['host'], db_setting['user'], db_setting['password'],
            db_setting['db'])


def parse_wzry(url, headers, propertys, db):
    try:
        responses = get_response(url, headers)
        results = responses['data']
        # print(len(results), results)
        game_name = '王者荣耀'
        source_from = '王者荣耀官网'  # 爬虫源网站
        types = 2
        for result in results:
            # print('赛程数据1:', type(result), result)
コード例 #5
0
"""
五大联赛资讯爬虫
抓取规则:五大联赛对应的start_url找到article_id,拼接成咨询详情页的url
用xpath提取
"""

redis = RedisCache_urldistict()
db = con_db(db_sport_setting['host'], db_sport_setting['user'], db_sport_setting['password'], db_sport_setting['db'])

England_url = 'http://www.ppsport.com/premierleague'
Spain_url = 'http://www.ppsport.com/laliga'
German_url = 'http://www.ppsport.com/bundesliga'
Italy_url = 'http://www.ppsport.com/seriea'
France_url = 'http://www.ppsport.com/ligue1'

information_pptv_log = get_log('information_pptv')

# 五大联赛对应联赛归属
start_url_O = {
    England_url:'英超联赛', Spain_url:'西甲联赛', German_url:'德甲联赛', Italy_url:'意甲联赛', France_url:'法甲联赛'
}

# 五大联赛的网页对应的提取资讯id的xpath规则:
# 英超是两个板块,其他联赛是三个板块(三个板块xpath规则目前看起来是一样)
# 提取的资讯id格式为('/article/news/1042662.html'),直接凭借成咨询详情页
league_xpath_rule = {
    England_url: {
        '/html/body/div[1]/div[4]/div[2]/div[3]/dl[1]/dd/a/@href',
        '/html/body/div[1]/div[4]/div[6]/div[2]/div/div/div/div/a/@href'
    },
    Spain_url:{
コード例 #6
0
form_data 中主要有两个变动参数:tournament_id(联赛id), page(页数)

"""

start_url = 'https://www.scoregg.com/services/api_url.php'

header = {
    'user-agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
    ' Chrome/84.0.4147.89 Safari/537.36'
}

league_exclude = [
    '2020 LCK夏季升降级赛', '2019KeSPA杯', '2019拉斯维加斯全明星', 'LPL公开训练赛', '2017 KPL秋季赛'
]
lol_team_log = get_log('lol_team_log')

# 请求英雄联盟联赛id的form_data  url:https://www.scoregg.com/services/api_url.php
form_data_yxlm = {
    'api_path': '/services/match/web_tournament_group_list.php',
    'method': 'GET',
    'platform': 'web',
    'api_version': '9.9.9',
    'language_id': 1,
    'gameID': 1,
    'type': 'all',
    'page': 1,
    'limit': 18,
    'year': ''
}
コード例 #7
0
today = datetime.now()
week_day = today.weekday()
today_str = today.strftime('%Y-%m-%d 00:00:00')
str_today = datetime.strptime(today_str, '%Y-%m-%d %H:%M:%S')
today_stamp = str_today.timestamp()
# 这周1的00:00:00时间戳
monday_stamp = int(today_stamp - 86400 * week_day)
# # 上周日的00:00:00时间戳
last_weekstamp = int(monday_stamp - 86400)
# 下周1的00:00:00时间戳
next_weekstamp = int(monday_stamp + 86400 * 7)

time_list = [monday_stamp, next_weekstamp]

match_wanplus_log = get_log('match_wanplus')

redis = RedisCache_checkAPI()
db = con_db(db_setting['host'], db_setting['user'], db_setting['password'],
            db_setting['db'])


def parse_wanplus(url, data, db, headers):
    try:
        responses = post_response(url, data, headers)
        results = responses['data']['scheduleList']
        game_name = '英雄联盟'
        source_from = 'wanplus'  # 爬虫源网站
        types = 1
        for key_list, result in results.items():
            date_time = result['time']
コード例 #8
0
import sys, os
sys.path.append(os.path.dirname(__file__) + os.sep + '../')

import requests
from lxml import etree
from import_data_to_mysql import con_db
from setting import db_sport_setting
from common_tool import get_log
"""
竞彩网赔率爬虫
"""

db = con_db(db_sport_setting['host'], db_sport_setting['user'],
            db_sport_setting['password'], db_sport_setting['db'])

sport_bet_log = get_log('sport_bet')

bet_list = {'u-cir': 2, 'u-dan': 1, 'u-kong': 0}

headers_bet = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
    'Chrome/84.0.4147.125 Safari/537.36'
}

start_url = 'https://info.sporttery.cn/football/match_list.php'

match_betdetail = 'https://www.lottery.gov.cn/football/match_hhad.jspx?mid='

response = requests.get(url=start_url, headers=headers_bet)
response = response.content.decode('gb2312')
コード例 #9
0
match_url_pre = 'https://img1.famulei.com/tr_round/{0}.json?_={1}'
# 战队排名url
type_url_pre = 'https://img1.famulei.com/match/teamrank/{0}.json?_={1}'

# 此时的时间戳
now_time_match = datetime.now()
timestamps_match = int(now_time_match.timestamp() * 1000)

source = 'score'

# 键值对:‘网站战队名:分组名’
team_type_name = {}
# 键值对:‘后端返回战队名:分组名’
realteam_type_name = {}

league_board_log = get_log('league_board')
db = con_db(db_setting['host'], db_setting['user'], db_setting['password'],
            db_setting['db'])
redis = RedisCache_checkAPI()


def parse(form_data_yxlm, types):
    game_name = '英雄联盟' if types == 1 else '王者荣耀'
    league_id = 0
    try:
        responses = post_response(start_url, form_data_yxlm, header)
        responses = responses['data']['list']
        # print('源数据:', responses)
        for response in responses:
            # 拿到联赛id
            tournamentID = response['tournamentID']
コード例 #10
0
import json
from datetime import datetime, timedelta
from common_tool import redis_check, get_response_proxy, get_log
from import_data_to_mysql import con_db
from import_data_to_redis import RedisCache_checkAPI
from setting import db_setting
"""
尚牛电竞网比赛详情爬虫(数据不稳定,已弃用)
"""

# 创建数据库对象
db = con_db(db_setting['host'], db_setting['user'], db_setting['password'],
            db_setting['db'])
# 创建redis对象
redis = RedisCache_checkAPI()
detail_log = get_log('match_detail')

# LPL战队列表
LPL_list = [
    'RNG', 'ES', 'EDG', 'LGD', 'IG', 'BLG', 'TES', 'SN', 'WE', 'OMG', 'DMO',
    'LNG', 'JDG', 'FPX', 'RW', 'VG', 'V5'
]

now_date = datetime.now()
now_stamp = int(now_date.timestamp())
# print(now_stamp)
cookie_message = 'UM_distinctid=172d9950ded60f-00916514fef24f-4353761-e1000-172d9950deea7b; ' \
                 'Hm_lvt_c95eb6bfdfb2628993e507a9f5e0ea01=1594349716,1594629849,1594689821,1594950270; ' \
                 'CNZZDATA1278221275=1183247664-1592785074-%7C1594948928; ' \
                 'Hm_lpvt_c95eb6bfdfb2628993e507a9f5e0ea01={}'.format(now_stamp)
headers = {
コード例 #11
0
from import_data_to_redis import RedisCache_checkAPI
from datetime import datetime, timedelta
from setting import db_setting
import json


"""
英雄联盟赛事详情爬虫
从score网站上抓取 # start_url:https://www.scoregg.com/schedule
抓取流程:
从start_url中拿到昨天和今天的网站的赛事,通过过滤赛程类型来选择要抓取的赛事详情,其中的result就是该赛事打了几局,拿到里面的resultID
用resultID拼凑出对局详情的数据 :'https://img1.famulei.com/match/result/21952.json?_=1596076077396'
再根据接口返回数据分析入库
"""

match_detail_score_log = get_log('match_detail_score')

# 现有需要的联赛
tournamentID = {
    '170': '2020 LCS夏季赛',
    '171': '2020 LEC夏季赛',
    '172': '2020 LPL夏季赛',
    '173': '2020 LCK夏季赛',
    '174': '2020 LDL夏季赛'
}

# 对应位置的字典
position_dict = {
    'blue_hero_a_name':1, 'blue_hero_b_name':2, 'blue_hero_c_name':3, 'blue_hero_d_name':4, 'blue_hero_e_name':5,
    'red_hero_a_name':1, 'red_hero_b_name':2, 'red_hero_c_name':3, 'red_hero_d_name':4, 'red_hero_e_name':5
}
コード例 #12
0
英雄排行榜(英雄联盟,王者荣耀)
抓取规则:
每个联赛都有一个tournament_id,以post请求:https://www.scoregg.com/services/api_url.php
form_data 中主要有两个变动参数:tournament_id(联赛id), page(页数)
"""

start_url = 'https://www.scoregg.com/services/api_url.php'

header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
              ' Chrome/84.0.4147.89 Safari/537.36'
}

league_exclude = ['2020 LCK夏季升降级赛', '2019KeSPA杯', '2019拉斯维加斯全明星', 'LPL公开训练赛', '2017 KPL秋季赛']

lol_heros_log = get_log('lol_heros')

# 请求英雄联盟联赛id的form_data  url:https://www.scoregg.com/services/api_url.php
form_data_yxlm = {
    'api_path': '/services/match/web_tournament_group_list.php',
    'method': 'GET',
    'platform': 'web',
    'api_version': '9.9.9',
    'language_id': 1,
    'gameID': 1,
    'type': 'all',
    'page': 1,
    'limit': 18,
    'year':''
}