def get_missing_reviews(games): score_ranks = np.array(map(lambda g: g.score_rank, games)) order = np.argsort(score_ranks) for i in reversed(order): app_id = games[i].app_id reviews_file = data_file("reviews", "%d.json"%app_id) if not os.path.exists(reviews_file): print(datetime.now().time(), "Getting reviews for", app_id) reviews = Review.get_reviews_from_steam(app_id) with open(data_file("reviews", reviews_file), "w") as f: json.dump(map(lambda r: r.to_json(), reviews), f, indent=2) else: print(datetime.now().time(), "Reviews for", app_id, "already exist!")
def refresh_tags_table(games): tag_reverse_index = compute_reverse_index(games) tags = create_tag_list(tag_reverse_index) failed = Tag.batch_save(tags) with open(data_file("failed_tags.json"), "w") as f: json.dump(map(lambda g: g.to_json(), failed), f, default=lambda o: o.__dict__, indent=2) return tags
def get_and_zip_the_universe(): # I know I can do Review.get_from_s3 here, but this should be less clunky reviews = dict() for compressed_review in Review.bucket.objects.all(): app_id = int(compressed_review.key) reviews[app_id] = json.loads(decompress(compressed_review.get()["Body"].read())) with gzip.open(data_file("reviews.gzip"), "w") as f: json.dump(reviews, f)
def load_compressed_matrix(filename=data_file("compressed_matrix.npy")): with open(filename, "rb") as f: arr = np.load(f) return arr[:, 0].astype(np.int), arr[:, 1:]
def save_compressed_matrix(app_ids, compressed_matrix, filename=data_file("compressed_matrix.npy")): with open(filename, "wb") as f: np.save(f, np.column_stack((app_ids, compressed_matrix)))
matches = reviews_re.match(num_reviews) if score in userscore_to_digit and matches is not None: self.userscore = userscore_to_digit[score] num_reviews, = matches.groups() self.num_reviews = int(num_reviews.replace(",", "")) print("Succesfully updated userscore for", self.app_id) return # This is just so that we don"t retry any games that can"t be scored (maybe because they # haven"t come out yet) automatically. print("Could not update userscore for", self.app_id) self.userscore = -2 self.num_reviews = -2 STEAMSPY_GAMES_JSON = data_file("steamspy_games.json") def iter_all_games(): if os.path.exists(STEAMSPY_GAMES_JSON): with open(STEAMSPY_GAMES_JSON) as f: games_json = json.load(f) else: games_json = requests.get( "http://steamspy.com/api.php?request=all").json() with open(STEAMSPY_GAMES_JSON, "w") as f: json.dump(games_json, f, default=lambda o: o.__dict__, indent=2) for app_id, game in games_json.iteritems(): if app_id == "999999": continue yield Game.from_steampspy_json(game)
def refresh_games_table(): games = list(iter_all_games()) failed = Game.batch_save(games) with open(data_file("failed_games.json"), "w") as f: json.dump(map(lambda g: g.to_json(), failed), f, default=lambda o: o.__dict__, indent=2) return games
Tag.table.put_item(Item=self.to_dynamo_json()) def compute_reverse_index(games=None): if games is None: games = Game.get_all() tag_reverse_index = defaultdict(set) for game in games: if len(game.tags) > 0: for tag_name in game.tags: tag_reverse_index[tag_name.lower().strip()].add( int(game.app_id)) return tag_reverse_index STEAMSPY_TAG_CSV = data_file("steamspy_tags.csv") def create_tag_list(tag_reverse_index): if not os.path.exists(STEAMSPY_TAG_CSV): page = requests.get("http://steamspy.com/tag/").text soup = BS(page, "lxml") table = soup.find("table", id="gamesbygenre") table_head = table.find("thead") table_body = table.find("tbody") with open(STEAMSPY_TAG_CSV, "w") as f: writer = csv.writer(f) # write CSV header, just because writer.writerow(list(table_head.stripped_strings)) writer.writerows( list(row.stripped_strings)
from __future__ import print_function import json import numpy as np from app.models import Game from app.models.tag import compute_reverse_index from app.utils import data_file if __name__ == '__main__': games = list(Game.get_all()) reverse_index = compute_reverse_index(games) doc_tag_matrix = np.zeros((len(games), len(reverse_index) + 1), dtype=np.int) app_ids = sorted([game.app_id for game in games]) tag_ids = sorted(reverse_index.keys()) doc_tag_matrix[:, 0] = np.array(app_ids) for app_index in xrange(len(games)): for tag_index in xrange(len(reverse_index)): if app_ids[app_index] in reverse_index[tag_ids[tag_index]]: doc_tag_matrix[app_index, tag_index + 1] = 1 with open(data_file("doc_tag_matrix.npy"), "wb") as f: np.save(f, doc_tag_matrix)