def parse(url, data, headers): try: types = 1 game_name = '英雄联盟' results = post_response(url, data, headers) results = results['data']['list'] # print('需要拿的赛程日期:', date_list) # print(len(results), type(results), results) for key_list, results_list in results.items(): # 排除掉今天和昨天之外的赛程或者接口数据为空 if key_list not in date_list or not results_list: continue result_list = results_list['info'] # print('所有赛程:', key_list, type(result_list), result_list) for key_detail, results_detail in result_list.items(): # 排除不需要的联赛 if key_detail not in tournamentID: continue league_name = tournamentID[key_detail] # print('现有联赛:', key_detail, results_detail) results_detail = results_detail['list'] for detail_list in results_detail: # 拿到网站的赛程id,用于后面redis_check source_matchid = detail_list['match_id'] # 网站赛事的比赛时间为 "2020-07-30"和 "17:00" 转换为十位的时间戳 start_time_str = detail_list['start_date'] + ' ' + detail_list['start_time'] + ':00' start_time_date = datetime.strptime(start_time_str, '%Y-%m-%d %H:%M:%S') start_time = int(start_time_date.timestamp()) if 'result' not in detail_list: continue detail_list = detail_list['result'] # print('得到的时间:', start_time_str, start_time_date, detail_list) if detail_list: # 以detail_list中遍历次数计为bo index_num = 1 for resultID in detail_list: # 赛程的小局id,用这个id存到对局详情表中才能作为判断更新或插入条件 resultID = resultID['resultID'] detail_urls = detail_url.format(resultID, now_date_stamp) # print('详情url:', resultID, detail_urls) detail_parse(detail_urls, resultID, types, index_num, game_name, league_name, start_time, headers) index_num += 1 except Exception as e: match_detail_score_log.error('数据抓取异常') match_detail_score_log.error(e)
def parse(types): try: # game_name = '英雄联盟' if types ==1 else '王者荣耀' form_data_tournament = form_data_yxlm if types == 1 else form_data_wzry responses = post_response(start_url, form_data_tournament, header) responses = responses['data']['list'] # print('源数据:', responses) for response in responses: # 拿到联赛id tournamentID = response['tournamentID'] source_league_name = response['name'] # 过滤掉排除的联赛 if source_league_name in league_exclude: continue # 访问后端拿到正确的联赛名 result_league = league_check(source_league_name, types) # print('访问后端得到的联赛结果:', result_league) league_name = result_league['result']['league_name'] league_id = result_league['result']['league_id'] if result_league['code'] == 600: # 战队榜单的url请求抓取2页,抓2次 form_data['tournament_id'] = tournamentID for i in range(4): form_data['page'] = i + 1 responses = post_response(start_url, form_data, header) if not responses: continue responses = responses['data']['data']['list'] for responses_team in responses: # print('拿到的源数据:', responses_team) team_name = responses_team['team_name'] # 访问后端拿到正确的团队名 result_team = team_check(team_name, types) if not result_team: continue if 'result' not in result_team: continue team_name = result_team['result']['team_name'] team_id = result_team['result']['team_id'] # print('访问后端得到的团队结果:', result_team) if result_team['code'] == 600: nick_name = responses_team['player_name'] # 根据昵称访问后端拿到正确的player_id result_player = player_check(nick_name, types) # print('访问后端得到的选手结果:', result_player) if result_player['code'] == 600: # print(11111,responses_team) player_id = result_player['result'][ 'player_id'] kda = responses_team['KDA'] mvp_count = responses_team['MVP'] play_count = responses_team['PLAYS_TIMES'] win_count = responses_team['win'] lose_count = responses_team['los'] offered_rate = responses_team['OFFERED_RATE'] kill_count = responses_team['total_kills'] kill_average = responses_team['AVERAGE_KILLS'] assist_count = responses_team['total_assists'] assist_average = responses_team[ 'AVERAGE_ASSISTS'] death_count = responses_team['total_deaths'] death_average = responses_team[ 'AVERAGE_DEATHS'] economic_minute = responses_team[ 'MINUTE_ECONOMIC'] hit_minute = responses_team['MINUTE_HITS'] damage_deal_minute = responses_team[ 'MINUTE_DAMAGEDEALT'] damage_deal_rate = responses_team[ 'DAMAGEDEALT_RATE'] damage_taken_minute = responses_team[ 'MINUTE_DAMAGETAKEN'] damage_taken_rate = responses_team[ 'DAMAGETAKEN_RATE'] wards_killed_minute = responses_team[ 'MINUTE_WARDKILLED'] wards_placed_minute = responses_team[ 'MINUTE_WARDSPLACED'] # 场均不到网站上没有,先写0 last_hit_per_game = 0 # 不确定网站上的total_kill, total_deaths, total_assists # 到底是总的(击杀数)还是最高的(击杀数) most_kill_per_games = responses_team[ 'total_kills'] most_death_per_games = responses_team[ 'total_deaths'] most_assist_per_games = responses_team[ 'total_assists'] # 网站没有头像就用默认头像 avatar = responses_team['player_image'] if responses_team['player_image'] \ else default_avatar position = responses_team['position'] position = position_dict[position] # 记录英雄联盟表 sql_teamrank_yxlm = "INSERT INTO `game_lol_player_league_stats` (player_id, league_id, kda, " \ "mvp_count, play_count, win_count, offered_rate, kill_count, kill_average, assist_count," \ " assist_average, death_count, death_average, economic_minute, hit_minute, damage_deal_minute," \ "damage_deal_rate, damage_taken_minute, damage_taken_rate, last_hit_per_game, " \ "most_kill_per_games, most_death_per_games, most_assist_per_games, team_id, nick_name, avatar," \ " position) VALUES({0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, " \ "{12}, {13}, {14}, {15}, {16}, {17}, {18}, {19}, {20}, {21}, {22}, {23}, '{24}', '{25}', {26}) " \ "ON DUPLICATE KEY UPDATE " \ "player_id={0}, league_id={1}, kda={2}, mvp_count={3}, play_count={4}, win_count={5}, " \ "offered_rate={6}, kill_count={7}, kill_average={8}, assist_count={9}, assist_average={10}, " \ "death_count={11}, death_average={12}, economic_minute={13}, hit_minute={14}, " \ "damage_deal_minute={15}, damage_deal_rate={16}, damage_taken_minute={17}, " \ "damage_taken_rate={18}, last_hit_per_game={19}, most_kill_per_games={20}, " \ "most_death_per_games={21}, most_assist_per_games={22}, team_id={23}, nick_name='{24}', avatar='{25}', " \ "position={26};".format(player_id, league_id, kda, mvp_count, play_count, win_count, offered_rate, kill_count, kill_average, assist_count, assist_average, death_count, death_average, economic_minute, hit_minute, damage_deal_minute, damage_deal_rate, damage_taken_minute, damage_taken_rate, last_hit_per_game, most_kill_per_games, most_death_per_games, most_assist_per_games, team_id, nick_name, avatar, position) sql_teamrank_wzry = "INSERT INTO `game_kog_player_league_stats` (player_id, league_id, win_count, " \ "lose_count, play_count, mvp_count, kda, kill_count, kill_average, assist_count," \ " assist_average, death_count, death_average, offered_rate, economic_minute, hit_minute," \ "wards_placed_minute, wards_killed_minute, damage_deal_rate, damage_deal_minute, damage_taken_minute, " \ "damage_taken_rate, type, team_id, nick_name, avatar, position) VALUES({0}, " \ "{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}, {14}, {15}, {16}," \ " {17}, {18}, {19}, {20}, {21}, {22}, {23}, '{24}', '{25}', {26}) " \ "ON DUPLICATE KEY UPDATE " \ "player_id={0}, league_id={1}, win_count={2}, lose_count={3}, play_count={4}, mvp_count={5}, " \ "kda={6}, kill_count={7}, kill_average={8}, assist_count={9}, assist_average={10}, " \ "death_count={11}, death_average={12}, offered_rate={13}, economic_minute={14}, " \ "hit_minute={15}, wards_placed_minute={16}, wards_killed_minute={17}, " \ "damage_deal_rate={18}, damage_deal_minute={19}, damage_taken_minute={20}, " \ "damage_taken_rate={21}, type={22}, team_id={23}, nick_name='{24}', avatar='{25}'," \ "position={26};".format(player_id, league_id, win_count, lose_count, play_count, mvp_count, kda, kill_count, kill_average, assist_count, assist_average, death_count, death_average, offered_rate, economic_minute, hit_minute, wards_placed_minute, wards_killed_minute, damage_deal_rate, damage_deal_minute, damage_taken_minute, damage_taken_rate, types, team_id, nick_name, avatar, position) sql_teamrank = sql_teamrank_yxlm if types == 1 else sql_teamrank_wzry # print('添加选手排行榜的类型以及sql:', types, sql_teamrank) db.update_insert(sql_teamrank) else: # 记录到黑名单中的选手名称 sql_blacklist = "select id from black_list where player_name ='{}';".format( nick_name) sql_add_blacklist = "insert into black_list set league_name = '{0}',team_name = '{1}', player_name ='{2}', " \ "source_from = 1, judge_position=0010;".format(league_name, team_name, nick_name) # print('记录到选手黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) else: # 记录到黑名单中的团队名称 sql_blacklist = "select id from black_list where team_name = '{}';".format( team_name) sql_add_blacklist = "insert into black_list set league_name = '{0}',team_name = '{1}', " \ "source_from = 1, judge_position=0100;".format(league_name, team_name) # print('记录到战队黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) else: # 记录到黑名单 sql_blacklist = "select id from black_list where league_name = '{}';".format( league_name) sql_add_blacklist = "insert into black_list set league_name = '{}', source_from = 1, " \ "judge_position=1000;".format(league_name) # print('记录到联赛黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) except Exception as e: lol_player_log.error(e, exc_info=True)
def parse(types): # try: source = 'score' form_data_tournament = form_data_yxlm if types == 1 else form_data_wzry responses = post_response(start_url, form_data_tournament, header) responses = responses['data']['list'] # print('源数据:', responses) for response in responses: # 拿到联赛id tournamentID = response['tournamentID'] source_league_name = response['name'] # 过滤掉排除的联赛 if source_league_name in league_exclude: continue # 访问后端拿到正确的联赛名 result_league = league_check(source_league_name, types) # print('访问后端得到的联赛结果:', result_league) league_name = result_league['result']['league_name'] league_id = result_league['result']['league_id'] if result_league['code'] == 600: # 战队榜单的url请求抓取2页,抓2次 form_data['tournament_id'] = tournamentID for i in range(2): form_data['page'] = i + 1 responses = post_response(start_url, form_data, header) responses = responses['data']['data']['list'] for responses_team in responses: # print('拿到的源数据:', responses_team) player_name = responses_team['player_name'] source_player_id = responses_team['player_id'] # form_data_hotheroes中playerID为字符串类型,'year'可以不带 form_data_hotheroes['playerID'] = '{}'.format( source_player_id) response_detail = post_response(start_url, form_data_hotheroes, header) response_hot_heroes = response_detail['data']['data'][ 'hot_heroes'] # 先从redis中找到player_id,有记录代表之前已记录,过滤掉 # redis存储结构:(源+player+source_player_id:player_id)‘score+player+8377:'123' key_player = source + '+' + 'player' + '+' + source_player_id result = redis.get_data(key_player) # print('redis查询player的结果:', result) if result: player_id = result parse_detail(response_hot_heroes, league_name, source, types, player_id) else: # redis中不存在就访问后端接口 result_player = player_check(player_name, types) # print('访问后端拿到的选手信息:', result_player) if result_player['code'] == 600: player_id = result_player['result']['player_id'] # 记录到redis中,格式为:(源+player+source_player_id:player_id)‘score+player+8377:'123' redis.set_data(key_player, 86400, player_id) # print('redis记录player完成:',key_player, player_id) parse_detail(response_hot_heroes, league_name, source, types, player_id) else: # 记录到黑名单中的选手名称 sql_blacklist = "select id from black_list where player_name ='{}';".format( player_name) sql_add_blacklist = "insert into black_list set league_name = '{0}',player_name ='{1}', " \ "source_from = 1, judge_position=0010;".format(league_name, player_name) # print('记录到选手黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) continue else: # 记录联赛到黑名单 sql_blacklist = "select id from black_list where league_name = '{}';".format( league_name) sql_add_blacklist = "insert into black_list set league_name = '{}', source_from = 1, " \ "judge_position=1000;".format(league_name) # print('记录到联赛黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db)
def parse(url, data, headers): types = 1 game_name = '英雄联盟' source_from = 'score' # 爬虫网站源 results = post_response(url, data, headers) results = results['data']['list'] print(len(results), type(results), results) for key_list, result in results.items(): match_list = result['info'] for key, matchs in match_list.items(): matchs = matchs['list'] for match in matchs: league_sourcename = match['tournament_name'] # 为避免与官网的冲突,先过滤掉LPL的赛程 if 'LPL' not in league_sourcename: team_a_sourcename = match['team_a_short_name'] team_b_sourcename = match['team_b_short_name'] status = match['status'] bo = match['game_count'] team_a_score = match['team_a_win'] team_b_score = match['team_b_win'] if team_a_score > team_b_score and status == '2': win_team = 'A' elif team_a_score < team_b_score and status == '2': win_team = 'B' else: win_team = None propertys = match['round_name'] start_time = match['start_date'] + ' ' + match[ 'start_time'] + ':00' check_match = league_sourcename + team_a_sourcename + team_b_sourcename + start_time print('check_match:', check_match) # 将字符串 start_date: "2020-06-29"与start_time: "04:00"拼接成 “2020-06-29 04:00:00” # 再转换成赛程表中的10位时间戳字段 time_datetime = datetime.strptime(start_time, '%Y-%m-%d %H:%M:00') time_stamp = int(time_datetime.timestamp()) # 访问接口前先在表中用check_match字段匹配一下,有就不再访问接口(check_match字段就是四个源字段的字符串拼接) status_check = check_local(db, check_match) print('本地访问是否有记录:', status_check) if status_check == None: # 请求检测接口 result = api_check(game_name, league_sourcename, team_a_sourcename, team_b_sourcename) print('检测接口返回:', result) # 检测为600, result['result']包含6个字段: # league_id, team_a_id, team_b_id, # league_name, team_a_name, team_b_name print('检测api结果:', result) if result['code'] == 600: insert_argument = {} insert_argument['type'] = types insert_argument['status'] = status insert_argument['bo'] = bo insert_argument['team_a_score'] = team_a_score insert_argument['team_b_score'] = team_b_score insert_argument['check_match'] = check_match insert_argument['win_team'] = win_team insert_argument['propertys'] = propertys insert_argument['source_from'] = source_from # API_return_600(db, result, time_stamp, insert_argument) elif result['code'] == 200: # 判断为200就将不存在的添加到‘api_check_200’表中,让后端完善赛事名称(只添加返回的id为0的,不为0就是None) API_return_200(db, result) # 本地已有数据就直接更新 else: print('本地已有数据就直接更新 ') # 这里把check_match拿进去再更新一次没关系 db.update_by_id(types, status, bo, team_a_score, team_b_score, win_team, check_match, propertys, source_from, start_time, status_check) print('本地已有数据就直接更新完成')
def parse_wanplus(url, data, db, headers): try: responses = post_response(url, data, headers) results = responses['data']['scheduleList'] game_name = '英雄联盟' source_from = 'wanplus' # 爬虫源网站 types = 1 for key_list, result in results.items(): date_time = result['time'] result = result['list'] # 有的字段是bool类型,过滤掉 if type(result) == bool: continue # print('赛程数据1:', key_list, type(result), result) for match in result: # print('赛程数据2:', key_list, type(match), match) league_sourcename = match['ename'] # 只抓取LCK, LCS, LEC, LDL联赛 if 'LCK' in league_sourcename or 'LCS' in league_sourcename or 'LEC' in league_sourcename or 'LDL' in league_sourcename: team_a_sourcename = match['oneseedname'] team_b_sourcename = match['twoseedname'] source_matchId = match['scheduleid'] # 源数据中的start_time为‘17:00’类型,转换为时间戳再加上result['time']才是表中的start_time类型 time = match['starttime'] strs = time.split(':') start_time = int(strs[0]) * 3600 + int( strs[1]) * 60 + date_time start_time = str(start_time) bo = match['bonum'] team_a_score = match['onewin'] team_b_score = match['twowin'] # match['isover']表示是否结束, match['live']表示是否进行中 # 同时也要用两队比分之和是否等于bo来判断是否结束 if match['live']: status = '1' elif not match['live'] and not match['isover']: status = '0' else: # 判断两队的分值和是否为bo,网站有可能status为2但是没打完 if int(team_a_score) + int(team_b_score) >= (bo / 2): status = '2' else: status = '1' if int(team_a_score) > int(team_b_score) and status == '2': win_team = 'A' elif int(team_a_score) < int( team_b_score) and status == '2': win_team = 'B' else: win_team = None propertys = match['groupname'] redis_return_operation(redis, game_name, db, source_from, league_sourcename, source_matchId, team_a_sourcename, team_b_sourcename, start_time, types, team_a_score, team_b_score, status, bo, win_team, propertys) except Exception as e: match_wanplus_log.error(e, exc_info=True)
def parse(types): try: form_data_tournament = form_data_yxlm if types == 1 else form_data_wzry responses = post_response(start_url, form_data_tournament, header) responses = responses['data']['list'] # print('源数据:', responses) for response in responses: # 拿到联赛id tournamentID = response['tournamentID'] source_league_name = response['name'] # 过滤掉排除的联赛 if source_league_name in league_exclude: continue # 访问后端拿到正确的联赛名 result_league = league_check(source_league_name, types) # print('访问后端得到的联赛结果:', result_league) league_name = result_league['result']['league_name'] league_id = result_league['result']['league_id'] if result_league['code'] == 600: # 战队榜单的url请求抓取2页,抓2次 form_data['tournament_id'] = tournamentID for i in range(3): form_data['page'] = i + 1 responses = post_response(start_url, form_data, header) if not responses: continue responses = responses['data']['data']['list'] for responses_team in responses: # print('拿到的源数据:', responses_team) team_name = responses_team['team_name'] # 网站存在战队为空的排名,过滤掉 if not team_name: continue # 访问后端拿到正确的团队名 result_team = team_check(team_name, types) team_name = result_team['result']['team_name'] # print('访问后端得到的团队结果:', result_team) if result_team['code'] == 600: team_id = result_team['result']['team_id'] win_count = responses_team['win'] lost_count = responses_team['los'] play_count = responses_team['MACTH_TIMES'] time_average = responses_team['AVERAGE_TIME'] # 存在比赛时长的,将时间转换为时间戳 if time_average: time_average = time_average.split(':') time_averages = int( time_average[0]) * 3600 + int( time_average[1]) * 60 + int( time_average[2]) else: time_averages = 0 first_blood_rate = responses_team['FIRSTBLOODKILL'] small_dragon_rate = responses_team[ 'SMALLDRAGON_RATE'] small_dragon_average = responses_team[ 'AVERAGE_SMALLDRAGON'] big_dragon_rate = responses_team['BIGDRAGON_RATE'] big_dragon_average = responses_team[ 'AVERAGE_BIGDRAGON'] tower_success_average = responses_team[ 'AVERAGE_TOWER_SUCCESS'] tower_fail_average = responses_team[ 'AVERAGE_TOWER_FAIL'] kda = responses_team['KDA'] kill_average = responses_team['AVERAGE_KILLS'] death_average = responses_team['AVERAGE_DEATHS'] assist_average = responses_team['AVERAGE_ASSISTS'] economic_average = responses_team['AVERAGE_MONEY'] economic_minute = responses_team['MINUTE_MONEY'] hit_minute = responses_team['MINUTE_HITS'] wards_placed_minute = responses_team[ 'MINUTE_WARDSPLACED'] wards_killed_minute = responses_team[ 'MINUTE_WARDSKILLED'] damage_average = responses_team[ 'AVERAGE_CHAMPIONS'] damage_minute = responses_team['MINUTE_OUTPUT'] score = responses_team['f_score'] win_rate = responses_team['VICTORY_RATE'] # 一塔率网站上没有,先写0 first_tower_rate = 0 # 记录英雄联盟表 sql_teamrank_yxlm = "INSERT INTO `game_lol_team_league_stats` (team_id, league_id, play_count, win_rate," \ " time_average, death_average, kill_average, economic_minute, first_blood_rate, tower_fail_average," \ " tower_success_average, kda, damage_average, big_dragon_rate, big_dragon_average, small_dragon_rate," \ "small_dragon_average, first_tower_rate, damage_minute, hit_minute, economic_average, type, " \ "wards_placed_minute, wards_killed_minute, assist_average) VALUES({0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, " \ "{8}, {9}, {10}, {11}, {12}, {13}, {14}, {15}, {16}, {17}, {18}, {19}, {20}, {21}, {22}, {23}, {24}) " \ "ON DUPLICATE KEY UPDATE " \ "team_id={0}, league_id={1}, play_count={2}, win_rate={3}, time_average={4}, death_average={5}, " \ "kill_average={6}, economic_minute={7}, first_blood_rate={8}, tower_fail_average={9}, " \ "tower_success_average={10}, kda={11}, damage_average={12}, big_dragon_rate={13}, big_dragon_average={14}, " \ "small_dragon_rate={15}, small_dragon_average={16}, first_tower_rate={17}, damage_minute={18}, " \ "hit_minute={19}, economic_average={20}, type={21}, wards_placed_minute={22}, wards_killed_minute={23}, " \ "assist_average={24};".format(team_id, league_id, play_count, win_rate, time_averages, death_average, kill_average, economic_minute, first_blood_rate, tower_fail_average, tower_success_average, kda, damage_average, big_dragon_rate, big_dragon_average, small_dragon_rate, small_dragon_average, first_tower_rate, damage_minute, hit_minute, economic_average, types, wards_placed_minute, wards_killed_minute, assist_average) sql_teamrank_wzry = "INSERT INTO `game_kog_team_league_stats` (team_id, league_id, win_count, lost_count, " \ "play_count, time_average, first_blood_rate, small_dragon_rate, small_dragon_average, " \ "big_dragon_rate, big_dragon_average, tower_success_average, tower_fail_average, kda, " \ "kill_average, death_average, assist_average, economic_average, economic_minute, hit_minute, " \ "wards_placed_minute, wards_killed_minute, damage_average, damage_minute, win_rate, score" \ ") VALUES({0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}, {14}, {15}," \ "{16}, {17}, {18}, {19}, {20}, {21}, {22}, {23}, {24}, {25})" \ "ON DUPLICATE KEY UPDATE "\ "team_id={0}, league_id={1}, win_count={2}, lost_count={3}, play_count={4}, time_average={5}, " \ "first_blood_rate={6}, small_dragon_rate={7}, small_dragon_average={8}, big_dragon_rate={9}, big_dragon_average={10}, " \ "tower_success_average={11}, tower_fail_average={12}, kda={13}, kill_average={14}, " \ "death_average={15}, assist_average={16}, economic_average={17}, economic_minute={18}," \ "hit_minute={19}, wards_placed_minute={20}, wards_killed_minute={21}, damage_average={22}," \ " damage_minute={23}, win_rate={24}, score={25};".format(team_id, league_id, win_count, lost_count, play_count, time_averages, first_blood_rate, small_dragon_rate, small_dragon_average, big_dragon_rate, big_dragon_average, tower_success_average, tower_fail_average, kda, kill_average, death_average, assist_average, economic_average, economic_minute, hit_minute, wards_placed_minute, wards_killed_minute, damage_average, damage_minute, win_rate, score) sql_teamrank = sql_teamrank_yxlm if types == 1 else sql_teamrank_wzry # print('添加团队排行榜的类型以及sql:', types, sql_teamrank) db.update_insert(sql_teamrank) else: # 记录到黑名单中的团队名称 sql_blacklist = "select id from black_list where team_name = '{}';".format( team_name) sql_add_blacklist = "insert into black_list set league_name = '{0}',team_name = '{1}', " \ "source_from = 1, judge_position=0100;".format(league_name, team_name) # print('记录到战队黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) else: # 记录联赛到黑名单 sql_blacklist = "select id from black_list where league_name = '{}';".format( league_name) sql_add_blacklist = "insert into black_list set league_name = '{}', source_from = 1, " \ "judge_position=1000;".format(league_name) # print('记录到联赛黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) except Exception as e: lol_team_log.error(e, exc_info=True)
def parse(form_data_yxlm, types): game_name = '英雄联盟' if types == 1 else '王者荣耀' league_id = 0 try: responses = post_response(start_url, form_data_yxlm, header) responses = responses['data']['list'] # print('源数据:', responses) for response in responses: # 拿到联赛id tournamentID = response['tournamentID'] source_league_name = response['name'] # 在未知联赛列表中就过滤掉 if source_league_name in league_unknow: continue # 13位时间戳 # source_league_name = '2020 LCS夏季赛' # tournamentID = '170' now_time = datetime.now() timestamps = int(now_time.timestamp() * 1000) # 先遍历拿到每个队伍的分组名称(分组名称在不同的联赛阶段是不变的) type_url = type_url_pre.format(tournamentID, timestamps) type_responses = get_response(type_url, header) type_responses = type_responses['data'] for type_response in type_responses: source_team_name = type_response['team_name'] group_name = type_response['group_name'] # 没有规定分组显示积分榜 team_type_name[ source_team_name] = group_name if group_name else '积分榜' # print('分组情况:', team_type_name) rank_url = rank_url_pre.format(tournamentID, timestamps) # print(rank_url, response) match_responses = get_response(rank_url, header) # 再遍历拿到每个联赛阶段的id用于凭借更加细致的赛程列表 for match_response in match_responses: stage = match_response['name'] round_son = match_response['round_son'] roundID = match_response['roundID'] # print('联赛阶段信息:', stage, round_son, roundID) # 用于统计每个联赛阶段胜负场次,净胜积分 team_win_count = {} team_lose_count = {} team_score_count = {} # 如果round_son有值,遍历去取‘round_son’中的id拼接赛程列表(在网页上的体现就是有更细一层的划分,类似于周几的赛程) if round_son: for match_list in round_son: id = match_list['id'] # 拿到每周(每组)赛事列表的id,遍历合并每周(每组)的 胜/负/净胜分 # print('计算积分之前的数据:', game_name, source_league_name, team_win_count, team_lose_count, team_score_count, # id) # 拼接赛程列表url match_url = match_url_pre.format(id, timestamps_match) match_details = get_response(match_url, header) for match_detail in match_details: result_detail = parse_detail( match_detail, game_name, source_league_name, team_win_count, team_lose_count, team_score_count) if result_detail: league_id = result_detail # 如果round_son为空,直接用‘p_’+ 'roundID'拼接赛程列表(在网页上的体现就是该联赛阶段只有一组赛程) else: id = 'p_{}'.format(roundID) # 拿到每周(每组)赛事列表的id,遍历合并每周(每组)的 胜/负/净胜分 # 拼接赛程列表url match_url = match_url_pre.format(id, timestamps_match) match_details = get_response(match_url, header) for match_detail in match_details: result_detail = parse_detail(match_detail, game_name, source_league_name, team_win_count, team_lose_count, team_score_count) if result_detail: league_id = result_detail # print('拿到的联赛阶段统计结果:', league_id, '胜', team_win_count, '负', team_lose_count, '净胜分', team_score_count) # 联赛阶段的积分数据已统计完,遍历更新或插入到表中 # 字典的键:‘team_a_name’+ ‘+’ + ‘team_b_id’ # 理论上 team_win_final, team_lose_final, team_score_final的长度一样 for key, value in team_win_count.items(): team_name = key.split('+')[0] team_id = key.split('+')[1] win_count = value lost_count = team_lose_count[key] score = team_score_count[key] # 从分组字典中找到队伍的对应分组 type_name = realteam_type_name[team_name] # 拿到该联赛阶段的 胜/负/净胜分后,开始更新后插入到表中 sql_rank = "INSERT INTO `game_league_board` (league_id, team_id, win_count, lost_count, score, type_name, stage," \ " type, team_name) VALUES('{0}', '{1}', {2}, {3}, {4}, '{5}', '{6}', {7}, '{8}') " \ " ON DUPLICATE KEY UPDATE " \ "league_id='{0}', team_id='{1}', win_count={2}, lost_count={3}, score={4}, type_name='{5}', " \ "stage='{6}', type={7}, team_name='{8}';".format(league_id, team_id, win_count, lost_count, score, type_name, stage, types, team_name) print('更新或插入排行表:', sql_rank) db.update_insert(sql_rank) print('更新完成') except Exception as e: league_board_log.error('每组队局之前的异常') league_board_log.error(e, exc_info=True)
def parse(types): try: # game_name = '英雄联盟' if types ==1 else '王者荣耀' form_data_tournament = form_data_yxlm if types ==1 else form_data_wzry responses = post_response(start_url, form_data_tournament, header) responses = responses['data']['list'] # print('源数据:', responses) for response in responses: # 拿到联赛id tournamentID = response['tournamentID'] source_league_name = response['name'] # 过滤掉排除的联赛 if source_league_name in league_exclude: continue # 访问后端拿到正确的联赛名 result_league = league_check(source_league_name, types) # print('访问后端得到的联赛结果:', result_league) league_name = result_league['result']['league_name'] league_id = result_league['result']['league_id'] if result_league['code'] == 600: # 战队榜单的url请求抓取2页,抓2次 form_data['tournament_id'] = tournamentID for i in range(5): form_data['page'] = i+1 responses = post_response(start_url, form_data, header) if not responses: continue responses = responses['data']['data']['list'] for responses_hero in responses: # print('拿到的源数据:', responses_hero) hero_avatar = responses_hero['hero_image'] hero_name = responses_hero['hero_name'] # 根据英雄名称访问后端拿到正确的hero_id result_hero = hero_check(hero_name, types) # 只处理后端返回600的数据 if result_hero['code'] == 600: hero_id = result_hero['result']['hero_id'] assist_average = responses_hero['AVERAGE_ASSISTS'] death_average = responses_hero['AVERAGE_DEATHS'] kill_average = responses_hero['AVERAGE_KILLS'] kda_average = responses_hero['KDA'] pick_rate = responses_hero['APPEAR'] ban_rate = responses_hero['PROHIBIT'] win_rate = responses_hero['VICTORY_RATE'] pick_count = responses_hero['appear_count'] ban_count = responses_hero['prohibit_count'] win_count = responses_hero['victory_count'] position = responses_hero['position_name'] # 记录英雄联盟表 sql_herorank_yxlm = "INSERT INTO `game_lol_heroes_league_stats` (hero_id, hero_avatar, hero_name, " \ "assist_average, death_average, kill_average, kda_average, pick_rate, ban_rate, " \ "win_rate, pick_count, ban_count, win_count, position, league_id) VALUES({0}, '{1}', " \ "'{2}', {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, '{13}', {14}) " \ "ON DUPLICATE KEY UPDATE " \ "hero_id={0}, hero_avatar='{1}', hero_name='{2}', assist_average={3},death_average={4}," \ " kill_average={5}, kda_average={6}, pick_rate={7}, ban_rate={8}, win_rate={9}, pick_count={10}," \ "ban_count={11}, win_count={12}, position='{13}', league_id={14};".format(hero_id, hero_avatar, hero_name, assist_average, death_average, kill_average, kda_average, pick_rate, ban_rate, win_rate, pick_count, ban_count, win_count, position, league_id) sql_herorank_wzry = "INSERT INTO `game_kog_heroes_league_stats` (hero_id, hero_avatar, hero_name, " \ "assist_average, death_average, kill_average, kda_average, show_rate, ban_rate, " \ "win_rate, pick_count, ban_count, win_count, league_id, position) VALUES({0}, " \ "'{1}', '{2}', {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}, '{14}') " \ "ON DUPLICATE KEY UPDATE " \ "hero_id={0}, hero_avatar='{1}', hero_name='{2}', assist_average={3},death_average={4}," \ " kill_average={5}, kda_average={6}, show_rate={7}, ban_rate={8}, win_rate={9}, pick_count={10}," \ "ban_count={11}, win_count={12}, league_id={13}, position='{14}';".format(hero_id, hero_avatar, hero_name, assist_average, death_average, kill_average, kda_average, pick_rate, ban_rate, win_rate, pick_count, ban_count, win_count, league_id, position) sql_herorank = sql_herorank_yxlm if types == 1 else sql_herorank_wzry # print('添加英雄排行榜的类型以及sql:', types, sql_herorank) db.update_insert(sql_herorank) else: # 记录到黑名单中的英雄名称 sql_blacklist = "select id from black_list where hero_name = '{}';".format(hero_name) sql_add_blacklist = "insert into black_list set league_name = '{0}',hero_name = '{1}', " \ "source_from = 1, judge_position=0001;".format(league_name, hero_name) # print('记录到英雄黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) else: # 记录到黑名单 sql_blacklist = "select id from black_list where league_name = '{}';".format(league_name) sql_add_blacklist = "insert into black_list set league_name = '{}', source_from = 1, " \ "judge_position=1000;".format(league_name) # print('记录到联赛黑名单sql:', sql_add_blacklist) api_return_200(sql_blacklist, sql_add_blacklist, db) except Exception as e: lol_heros_log.error(e, exc_info=True)