def save_heroes(db, url): """Crawl heroes from specified URL and save them to the database. :param db: database connection :param url: target URL """ cur = db.cursor() data = match_api.crawl(url) for hero in data["result"]["heroes"]: query = "INSERT OR IGNORE INTO heroes (id, name) " \ "VALUES (" + str(hero["id"]) + ",'" + hero["name"] + "');" print(query) cur.execute(query) db.commit()
def update_matches(db, urls): """Crawl match data and properly update matches, heroes and bridge tables. :param db: database connection :param urls: crawling urls of matches """ cur = db.cursor() for idx, url in enumerate(urls): # crawl match data print('Crawling data from', url, str(idx) + '/' + str(len(urls))) data = match_api.crawl(url) # filter important information valid = is_match_valid(data) match_id = data["result"]["match_id"] # save match data query = "UPDATE matches SET " \ "processed = 1," \ "invalid = " + str(int(not valid)) + " " \ "WHERE id = " + str(match_id) + ";" print(query) cur.execute(query) db.commit() # ignore invalid matches if valid: has_radiant_won = data["result"]["radiant_win"] radiant_heroes, dire_heroes = filter_heroes(data) # save heroes stats in match query = "INSERT INTO heroes_matches (heroId, matchId, hasWon) VALUES " for hero_id in radiant_heroes: query += "(" + str(hero_id) + ", " + str(match_id) + ", " + str(int(has_radiant_won)) + "), " for hero_id in dire_heroes: query += "(" + str(hero_id) + ", " + str(match_id) + ", " + str(int(not has_radiant_won)) + "), " query = query[:-2] + ";" print(query) cur.execute(query) db.commit()
def crawl_matches(url): """Crawl data of all accessible matches from Steam Web API. :param url: match history base URL :return: list of match ids """ last_match_id = 0 match_ids = set() # crawling loop while True: # add starting match id to the URL if it is not first iteration if last_match_id == 0: crawling_url = url else: crawling_url = url + '&start_at_match_id=' + str(last_match_id) # crawl print('Crawling data from', crawling_url) data = match_api.crawl(crawling_url) print(len(data["result"]["matches"]), data["result"]["matches"]) # test crawled matches data if len(data["result"]["matches"]) > 0: # parse match ids for m_data in data["result"]["matches"]: last_match_id = m_data["match_id"] match_ids.add(last_match_id) else: # stop crawling if there are no results break return match_ids