Esempio n. 1
0
def get_missing_reviews(games):
    score_ranks = np.array(map(lambda g: g.score_rank, games))
    order = np.argsort(score_ranks)
    for i in reversed(order):
        app_id = games[i].app_id
        reviews_file = data_file("reviews", "%d.json"%app_id)
        if not os.path.exists(reviews_file):
            print(datetime.now().time(), "Getting reviews for", app_id)
            reviews = Review.get_reviews_from_steam(app_id)
            with open(data_file("reviews", reviews_file), "w") as f:
                json.dump(map(lambda r: r.to_json(), reviews), f, indent=2)
        else:
            print(datetime.now().time(), "Reviews for", app_id, "already exist!")
Esempio n. 2
0
def refresh_tags_table(games):
    tag_reverse_index = compute_reverse_index(games)
    tags = create_tag_list(tag_reverse_index)
    failed = Tag.batch_save(tags)
    with open(data_file("failed_tags.json"), "w") as f:
        json.dump(map(lambda g: g.to_json(), failed), f, default=lambda o: o.__dict__, indent=2)
    return tags
Esempio n. 3
0
def get_and_zip_the_universe():
    # I know I can do Review.get_from_s3 here, but this should be less clunky
    reviews = dict()
    for compressed_review in Review.bucket.objects.all():
        app_id = int(compressed_review.key)
        reviews[app_id] = json.loads(decompress(compressed_review.get()["Body"].read()))
    with gzip.open(data_file("reviews.gzip"), "w") as f:
        json.dump(reviews, f)
Esempio n. 4
0
def load_compressed_matrix(filename=data_file("compressed_matrix.npy")):
    with open(filename, "rb") as f:
        arr = np.load(f)
    return arr[:, 0].astype(np.int), arr[:, 1:]
Esempio n. 5
0
def save_compressed_matrix(app_ids,
                           compressed_matrix,
                           filename=data_file("compressed_matrix.npy")):
    with open(filename, "wb") as f:
        np.save(f, np.column_stack((app_ids, compressed_matrix)))
Esempio n. 6
0
                matches = reviews_re.match(num_reviews)
                if score in userscore_to_digit and matches is not None:
                    self.userscore = userscore_to_digit[score]
                    num_reviews, = matches.groups()
                    self.num_reviews = int(num_reviews.replace(",", ""))
                    print("Succesfully updated userscore for", self.app_id)
                    return

        # This is just so that we don"t retry any games that can"t be scored (maybe because they
        # haven"t come out yet) automatically.
        print("Could not update userscore for", self.app_id)
        self.userscore = -2
        self.num_reviews = -2


STEAMSPY_GAMES_JSON = data_file("steamspy_games.json")


def iter_all_games():
    if os.path.exists(STEAMSPY_GAMES_JSON):
        with open(STEAMSPY_GAMES_JSON) as f:
            games_json = json.load(f)
    else:
        games_json = requests.get(
            "http://steamspy.com/api.php?request=all").json()
        with open(STEAMSPY_GAMES_JSON, "w") as f:
            json.dump(games_json, f, default=lambda o: o.__dict__, indent=2)
    for app_id, game in games_json.iteritems():
        if app_id == "999999":
            continue
        yield Game.from_steampspy_json(game)
Esempio n. 7
0
def refresh_games_table():
    games = list(iter_all_games())
    failed = Game.batch_save(games)
    with open(data_file("failed_games.json"), "w") as f:
        json.dump(map(lambda g: g.to_json(), failed), f, default=lambda o: o.__dict__, indent=2)
    return games
Esempio n. 8
0
        Tag.table.put_item(Item=self.to_dynamo_json())


def compute_reverse_index(games=None):
    if games is None:
        games = Game.get_all()
    tag_reverse_index = defaultdict(set)
    for game in games:
        if len(game.tags) > 0:
            for tag_name in game.tags:
                tag_reverse_index[tag_name.lower().strip()].add(
                    int(game.app_id))
    return tag_reverse_index


STEAMSPY_TAG_CSV = data_file("steamspy_tags.csv")


def create_tag_list(tag_reverse_index):
    if not os.path.exists(STEAMSPY_TAG_CSV):
        page = requests.get("http://steamspy.com/tag/").text
        soup = BS(page, "lxml")
        table = soup.find("table", id="gamesbygenre")
        table_head = table.find("thead")
        table_body = table.find("tbody")
        with open(STEAMSPY_TAG_CSV, "w") as f:
            writer = csv.writer(f)
            # write CSV header, just because
            writer.writerow(list(table_head.stripped_strings))
            writer.writerows(
                list(row.stripped_strings)
from __future__ import print_function

import json
import numpy as np

from app.models import Game
from app.models.tag import compute_reverse_index
from app.utils import data_file

if __name__ == '__main__':
    games = list(Game.get_all())
    reverse_index = compute_reverse_index(games)
    doc_tag_matrix = np.zeros((len(games), len(reverse_index) + 1),
                              dtype=np.int)
    app_ids = sorted([game.app_id for game in games])
    tag_ids = sorted(reverse_index.keys())
    doc_tag_matrix[:, 0] = np.array(app_ids)

    for app_index in xrange(len(games)):
        for tag_index in xrange(len(reverse_index)):
            if app_ids[app_index] in reverse_index[tag_ids[tag_index]]:
                doc_tag_matrix[app_index, tag_index + 1] = 1

    with open(data_file("doc_tag_matrix.npy"), "wb") as f:
        np.save(f, doc_tag_matrix)