def scrape(esc: EsportsClient, events, force): player_data_keys = [ "perkPrimaryStyle", "perkSubStyle", "perk0", "perk1", "perk2", "perk3", "perk4", "perk5", "statPerk0", "statPerk1", "statPerk2" ] player_positions = ['Top', 'Jungle', 'Mid', 'ADC', 'Support'] rune_dict = get_rune_dict() champ_dict = get_champ_dict() print_if_not_silent(events) with open('mh_riot_endpoint.txt') as f: mh_riot_endpoint = f.read().strip() mh_riot_token = get_id_token() for page_to_query in events: print_if_not_silent(page_to_query) result = esc.client.api( 'cargoquery', format="json", limit='max', tables="MatchScheduleGame=MSG,MatchSchedule=MS", fields="MSG.OverviewPage,MSG.MatchHistory", where=(r'MSG._pageName="%s" AND MSG.MatchHistory IS NOT NULL' r' AND NOT MSG.MatchHistory RLIKE ".*(lpl|lol)\.qq\.com.*"') % page_to_query, join_on="MSG.MatchId=MS.MatchId", order_by="MS.N_Page,MS.N_MatchInPage, MSG.N_GameInMatch") if result['cargoquery']: event = result['cargoquery'][0]['title']['OverviewPage'] suffix = '' page_start = page_to_query.replace('Data:', '') if page_start != event: suffix = page_start.replace(event, '') errors_http = [] errors_key = [] page_name = event + '/Runes' + suffix page = esc.client.pages[page_name] text = page.text() if 'RunesQueryTournament' in text: continue text_tbl = [] if text != "" and len(text.split('\n')) > 1: text_tbl = text.split('\n') intro = text_tbl.pop(0) + '\n' + text_tbl.pop(0) else: overview_page = esc.client.pages[event] overview_text = overview_page.text() overview_text_tbl = overview_text.split('\n') tabs = overview_text_tbl[0] intro = tabs + '\n{{RunesStart2019}}' lines = [intro] for i, cargo_game in enumerate(result['cargoquery']): mh = (cargo_game['title']['MatchHistory']).replace( '&', '&') print_if_not_silent(mh) location = re.match(r'.*details/([^&]*)', mh)[1] if len(text_tbl) > 10 * i and ( location in text_tbl[10 * i]) and not force: for j in range(0, 10): lines.append(text_tbl[j + 10 * i]) else: print_if_not_silent('Querying match %s' % mh) json_loc = mh_riot_endpoint + location try: game = requests.Session().get(json_loc, cookies={ 'id_token': mh_riot_token }).json() full_patch = game['gameVersion'] patch_tbl = full_patch.split('.') patch = str(patch_tbl[0] + '.' + patch_tbl[1]) for j in range(0, 10): player_name = game['participantIdentities'][j][ 'player']['summonerName'] try: player_team = re.match('^(.+?) (.*)', player_name)[1] player_name = re.match('^(.+?) (.*)', player_name)[2] except Exception: player_team = '' player_champion_n = game['participants'][j][ 'championId'] player_champion = champ_dict.get( player_champion_n, str(player_champion_n)) player_position = player_positions[j % 5] this_player = [ player_name, player_team, player_champion, player_position ] for key in player_data_keys: rune_key = game['participants'][j]['stats'][ key] rune_output = rune_dict.get(rune_key, rune_key) this_player.append(rune_output) this_player_output = '{{RunesLine2019|' + ( '|'.join(this_player) ) + '|patch=' + patch + '|mh=' + location + '}}' lines.append(this_player_output) text_tbl.insert(10 * i + j, '') except urllib.error.HTTPError: errors_http.append(mh) except KeyError: errors_key.append(mh) lines.append('{{RunesEnd}}') new_text = '\n'.join(lines) if new_text != text and len(lines) > 3: print_if_not_silent('Saving page %s...' % page_name) esc.save(page, new_text, summary='Automatically updating Runes (python)') else: print_if_not_silent('Skipping page %s, no changes' % page_name) error_text = '' for e in errors_http: error_text = error_text + ' <br>\n' + page_to_query + ': ' + e + ' (HTTP)' for e in errors_key: error_text = error_text + '\n' + e + ' (Key)' if error_text != '': error_page = esc.client.pages[ 'User:RheingoldRiver/Rune Errors'] esc.save(error_page, error_text, summary='Reporting a Rune Error')
def scrapeLPL(esc: EsportsClient, events, force): player_positions = ['Top', 'Jungle', 'Mid', 'ADC', 'Support'] rune_dict = get_rune_dict() champ_dict = get_champ_dict() please_escape = False with open('mh_qq_endpoint.txt') as f: mh_qq_endpoint = f.readlines() mh_qq_endpoint = [_.strip() for _ in mh_qq_endpoint] for page_to_query in events: if please_escape: break print_if_not_silent(page_to_query) result = esc.client.api( 'cargoquery', format="json", limit='max', tables="MatchScheduleGame=MSG,MatchSchedule=MS", fields="MSG.OverviewPage,MSG.MatchHistory", where=(r'MSG._pageName="%s" AND MSG.MatchHistory IS NOT NULL' r' AND MSG.MatchHistory RLIKE ".*(lpl|lol)\.qq\.com.*"') % page_to_query, join_on="MSG.MatchId=MS.MatchId", order_by="MS.N_Page,MS.N_MatchInPage, MSG.N_GameInMatch", group_by='MSG.MatchHistory') if not result['cargoquery']: continue event = result['cargoquery'][0]['title']['OverviewPage'] suffix = '' page_start = page_to_query.replace('Data:', '') if page_start != event: suffix = page_start.replace(event, '') errors_http = [] errors_key = [] page_name = event + '/Runes' + suffix page = esc.client.pages[page_name] text = page.text() if 'RunesQueryTournament' in text: continue text_tbl = [] team_keys = ['left', 'right'] if text != "" and len(text.split('\n')) > 1: text_tbl = text.split('\n') intro = text_tbl.pop(0) + '\n' + text_tbl.pop(0) else: overview_page = esc.client.pages[event] overview_text = overview_page.text() overview_text_tbl = overview_text.split('\n') tabs = overview_text_tbl[0] intro = tabs + '\n{{RunesStart2019}}' lines = [intro] counter = 0 for i, cargo_game in enumerate(result['cargoquery']): # lmt += 1 # if lmt == 2: # please_escape = True # break mh = (cargo_game['title']['MatchHistory']).replace('&', '&') print_if_not_silent(mh) location = re.match(r'.*bmid=([0-9]*)', mh)[1] if len(text_tbl) > 10 * counter and ( location in text_tbl[10 * counter]) and not force: print_if_not_silent('Skipping %s' % location) for j in range(0, 10): lines.append(text_tbl[j + 10 * counter]) counter = counter + 1 if len(text_tbl) > 10 * counter and ( location in text_tbl[10 * counter]) and not force: print_if_not_silent('Skipping %s (2)' % location) for j in range(0, 10): lines.append(text_tbl[j + 10 * counter]) counter = counter + 1 if len(text_tbl) > 10 * counter and ( location in text_tbl[10 * counter]) and not force: print_if_not_silent('Skipping %s (3)' % location) for j in range(0, 10): lines.append(text_tbl[j + 10 * counter]) counter = counter + 1 else: print_if_not_silent('Querying match %s' % mh) json_loc = mh_qq_endpoint[0] + location print_if_not_silent(json_loc) try: with urllib.request.urlopen(json_loc) as url: series = json.loads(url.read().decode()) for game in series['msg']: counter = counter + 1 gameId = game['sMatchId'] json_loc_2 = mh_qq_endpoint[1] + gameId print_if_not_silent(json_loc_2) with urllib.request.urlopen(json_loc_2) as url: game = json.loads(url.read().decode()) areaId = game['msg']['sMatchInfo']['AreaId'] battleId = game['msg']['battleInfo']['BattleId'] json_loc_3 = mh_qq_endpoint[ 2] + battleId + mh_qq_endpoint[3] + areaId print_if_not_silent(json_loc_3) with urllib.request.urlopen(json_loc_3) as url: worldLookup = json.loads(url.read().decode()) worldLookupJSON = json.loads(worldLookup['msg']) has_runes = True rune_data = {} if worldLookupJSON['battle_count_'] == 0: has_runes = False else: worldId = worldLookupJSON['battle_list_'][0][ 'world_'] json_loc_4 = mh_qq_endpoint[4] + str( worldId) + mh_qq_endpoint[5] + str(battleId) print_if_not_silent(json_loc_4) with urllib.request.urlopen(json_loc_4) as url: rune_data_unsorted_init = json.loads( url.read().decode()) rune_data_unsorted = json.loads( rune_data_unsorted_init['msg']) for p in range(0, 10): rune_key = rune_data_unsorted['hero_list_'][p][ 'hero_id_'] rune_data[int(rune_key)] = rune_data_unsorted[ 'hero_list_'][p].copy() patch = '' # unless we can automated get patch which idt we can from any endpoint teamnames = get_team_names(game) game_data = json.loads( game['msg']['battleInfo']['BattleData']) j = 0 for j in range(0, 10): rune_data_this = {} if has_runes: rune_data_this = copy.deepcopy(rune_data) player_data = get_player_data( game_data, team_keys, j) player_champion_n = int(player_data['hero']) player_name = player_data['name'] player_team = get_this_teamname( teamnames, team_keys, j) player_name = player_name.replace(player_team, '') player_champion = champ_dict[ player_champion_n] if player_champion_n in champ_dict else str( player_champion_n) player_position = player_positions[j % 5] this_player = [ player_name, player_team, player_champion, player_position ] if has_runes: player_rune_data = rune_data_this[ player_champion_n]['runes_info_'][ 'runes_list_'].copy() this_rune_id = '' this_player.append(rune_dict['trees'][ player_rune_data[0]['runes_id_']]) for _ in range(0, 5): this_rune = player_rune_data.pop(0) this_rune_id = this_rune['runes_id_'] rune_output = rune_dict[ this_rune_id] if this_rune_id in rune_dict else this_rune_id this_player.append(rune_output) this_player.insert( 5, rune_dict['trees'][int(this_rune_id)]) stat_runes = player_rune_data.copy() while stat_runes: if stat_runes[0]['runes_num_'] == 1: this_rune = stat_runes.pop(0) else: this_rune = stat_runes[0] stat_runes[0][ 'runes_num_'] = stat_runes[0][ 'runes_num_'] - 1 this_rune_id = this_rune['runes_id_'] rune_output = rune_dict[ this_rune_id] if this_rune_id in rune_dict else this_rune_id this_player.append(rune_output) this_player_output = '{{RunesLineLPL2019|' + ( '|'.join(this_player) ) + '|patch=' + patch + '|mh=' + location + '}}' lines.append(this_player_output) text_tbl.insert(10 * i + j, '') except urllib.error.HTTPError: errors_http.append(mh) except KeyError: errors_key.append(mh) except Exception as e: print_if_not_silent(e) lines.append('{{RunesEnd}}') new_text = '\n'.join(lines) if new_text != text and len(lines) > 3: print_if_not_silent('Saving page %s...' % page_name) esc.save(page, new_text, summary='Automatically updating Runes (python)') else: print_if_not_silent('Skipping page %s, no changes' % page_name) error_text = '' for e in errors_http: error_text = error_text + ' <br>\n' + page_to_query + ': ' + e + ' (HTTP)' for e in errors_key: error_text = error_text + '\n' + e + ' (Key)' if error_text != '': error_page = esc.client.pages['User:RheingoldRiver/Rune Errors'] esc.save(error_page, error_text, summary='Reporting a Rune Error')