예제 #1
0
def save_heroes(db, url):
    """Crawl heroes from specified URL and save them to the database.

    :param db: database connection
    :param url: target URL
    """
    cur = db.cursor()
    data = match_api.crawl(url)

    for hero in data["result"]["heroes"]:
        query = "INSERT OR IGNORE INTO heroes (id, name) " \
                "VALUES (" + str(hero["id"]) + ",'" + hero["name"] + "');"
        print(query)
        cur.execute(query)

    db.commit()
예제 #2
0
def update_matches(db, urls):
    """Crawl match data and properly update matches, heroes and bridge tables.

    :param db: database connection
    :param urls: crawling urls of matches
    """
    cur = db.cursor()

    for idx, url in enumerate(urls):
        # crawl match data
        print('Crawling data from', url, str(idx) + '/' + str(len(urls)))
        data = match_api.crawl(url)

        # filter important information
        valid = is_match_valid(data)
        match_id = data["result"]["match_id"]

        # save match data
        query = "UPDATE matches SET " \
                "processed = 1," \
                "invalid = " + str(int(not valid)) + " " \
                "WHERE id = " + str(match_id) + ";"

        print(query)
        cur.execute(query)
        db.commit()

        # ignore invalid matches
        if valid:
            has_radiant_won = data["result"]["radiant_win"]
            radiant_heroes, dire_heroes = filter_heroes(data)

            # save heroes stats in match
            query = "INSERT INTO heroes_matches (heroId, matchId, hasWon) VALUES "

            for hero_id in radiant_heroes:
                query += "(" + str(hero_id) + ", " + str(match_id) + ", " + str(int(has_radiant_won)) + "), "

            for hero_id in dire_heroes:
                query += "(" + str(hero_id) + ", " + str(match_id) + ", " + str(int(not has_radiant_won)) + "), "

            query = query[:-2] + ";"

            print(query)
            cur.execute(query)
            db.commit()
예제 #3
0
def crawl_matches(url):
    """Crawl data of all accessible matches from Steam Web API.

    :param url: match history base URL
    :return: list of match ids
    """
    last_match_id = 0
    match_ids = set()

    # crawling loop
    while True:

        # add starting match id to the URL if it is not first iteration
        if last_match_id == 0:
            crawling_url = url
        else:
            crawling_url = url + '&start_at_match_id=' + str(last_match_id)

        # crawl
        print('Crawling data from', crawling_url)
        data = match_api.crawl(crawling_url)
        print(len(data["result"]["matches"]), data["result"]["matches"])

        # test crawled matches data
        if len(data["result"]["matches"]) > 0:

            # parse match ids
            for m_data in data["result"]["matches"]:
                last_match_id = m_data["match_id"]
                match_ids.add(last_match_id)

        else:
            # stop crawling if there are no results
            break

    return match_ids