def reap_orphans_for_date(reaping_date): inventory = get_reaping_inventory_for_date(reaping_date) for hour, hour_inventory in inventory.items(): reaped_orphan_count = 0 for minute, minute_inventory in hour_inventory.items(): for shortid, keys in minute_inventory.items(): descriptor = keys.get("descriptor") if is_safe_to_reap(shortid, keys): log.debug("Reaping Descriptor: %r", descriptor) aws.S3.delete_object( Bucket=settings.S3_RAW_LOG_UPLOAD_BUCKET, Key=keys["descriptor"]) reaped_orphan_count += 1 else: log.debug("Skipping: %r (Unsafe to reap)", descriptor) log.info("A total of %s descriptors reaped for hour: %s" % (str(reaped_orphan_count), str(hour))) # Report count of orphans to Influx fields = {"count": reaped_orphan_count} influx_metric("orphan_descriptors_reaped", fields=fields, timestamp=reaping_date, hour=hour)
def delete(self): # We only perform delete on NEW raw uploads because when we get to this point we have # a copy of the log and descriptor attached to the UploadEvent if self.state == RawUploadState.NEW: log.debug("Deleting files from S3") aws.S3.delete_object(Bucket=self.bucket, Key=self.log_key) aws.S3.delete_object(Bucket=self.bucket, Key=self.descriptor_key)
def reap_orphans_for_date(reaping_date): inventory = get_reaping_inventory_for_date(reaping_date) for hour, hour_inventory in inventory.items(): reaped_orphan_count = 0 for minute, minute_inventory in hour_inventory.items(): for shortid, keys in minute_inventory.items(): if is_safe_to_reap(shortid, keys): log.debug("Reaping Descriptor: %r", keys["descriptor"]) aws.S3.delete_object( Bucket=settings.S3_RAW_LOG_UPLOAD_BUCKET, Key=keys["descriptor"] ) reaped_orphan_count += 1 else: log.debug("Skipping: %r (Unsafe To Reap)", keys["descriptor"]) log.info( "A total of %s descriptors reaped for hour: %s" % ( str(reaped_orphan_count), str(hour) ) ) # Report count of orphans to Influx fields = { "count": reaped_orphan_count } influx_metric( "orphan_descriptors_reaped", fields=fields, timestamp=reaping_date, hour=hour )
def attempt_request_triggered_query_execution(parameterized_query, run_local=False, priority=None): do_personal = settings.REDSHIFT_TRIGGER_PERSONALIZED_DATA_REFRESHES_FROM_QUERY_REQUESTS if run_local or settings.REDSHIFT_TRIGGER_CACHE_REFRESHES_FROM_QUERY_REQUESTS: execute_query(parameterized_query, run_local, priority) elif do_personal and parameterized_query.is_personalized: execute_query(parameterized_query, run_local, priority) else: log.debug("Triggering query from web app is disabled")
def prepare_upload_event_log_location(self, bucket, key): self._upload_event_log_bucket = bucket self._upload_event_log_key = key if key != self.log_key: copy_source = "%s/%s" % (self.bucket, self.log_key) log.debug("Copying power.log %r to %r:%r" % (copy_source, bucket, key)) aws.S3.copy_object(Bucket=bucket, Key=key, CopySource=copy_source) self._upload_event_location_populated = True
def do_process_upload_event(upload_event): meta = json.loads(upload_event.metadata) # Parse the UploadEvent's file parser = parse_upload_event(upload_event, meta) # Validate the resulting object and metadata entity_tree, exporter = validate_parser(parser, meta) # Create/Update the global game object and its players global_game, global_game_created = find_or_create_global_game(entity_tree, meta) players = update_global_players(global_game, entity_tree, meta, upload_event) # Create/Update the replay object itself replay, game_replay_created = find_or_create_replay( parser, entity_tree, meta, upload_event, global_game, players ) can_attempt_redshift_load = False if global_game.loaded_into_redshift is None: log.debug("Global game has not been loaded into redshift.") # Attempt to claim the advisory_lock, if successful: can_attempt_redshift_load = global_game.acquire_redshift_lock() else: log.debug("Global game has already been loaded into Redshift") # Defer flushing the exporter until after the UploadEvent is set to SUCCESS # So that the player can start watching their replay sooner def do_flush_exporter(): # Only if we were able to claim the advisory lock do we proceed here. if can_attempt_redshift_load: log.debug("Redshift lock acquired. Will attempt to flush to redshift") if should_load_into_redshift(upload_event, global_game): with influx_timer("generate_redshift_game_info_duration"): game_info = get_game_info(global_game, replay) exporter.set_game_info(game_info) try: with influx_timer("flush_exporter_to_firehose_duration"): flush_exporter_to_firehose(exporter) except: raise else: global_game.loaded_into_redshift = timezone.now() global_game.save() # Okay to release the advisory lock once loaded_into_redshift is set # It will also be released automatically when the lambda exits. global_game.release_redshift_lock() else: log.debug("Did not acquire redshift lock. Will not flush to redshift") return replay, do_flush_exporter
def get_opponent_revealed_deck(entity_tree, friendly_player_id, game_type): for player in entity_tree.players: if player.player_id != friendly_player_id: decklist = [c.card_id for c in player.initial_deck if c.card_id] deck, created = Deck.objects.get_or_create_from_id_list( decklist, hero_id=player._hero.card_id, game_type=game_type, classify_into_archetype=True ) log.debug("Opponent revealed deck %i (created=%r)", deck.id, created) return deck
def find_or_create_global_game(entity_tree, meta): ladder_season = meta.get("ladder_season") if not ladder_season: ladder_season = guess_ladder_season(meta["end_time"]) game_type = meta.get("game_type", 0) if game_type == 7: # the enum used to be wrong... game_type = int(BnetGameType.BGT_CASUAL_STANDARD) defaults = { "game_handle": meta.get("game_handle"), "server_address": meta.get("server_ip"), "server_port": meta.get("server_port"), "server_version": meta.get("server_version"), "game_type": game_type, "format": meta.get("format", 0), "build": meta["build"], "match_start": meta["start_time"], "match_end": meta["end_time"], "brawl_season": meta.get("brawl_season", 0), "ladder_season": ladder_season, "scenario_id": meta.get("scenario_id"), "num_entities": len(entity_tree.entities), "num_turns": entity_tree.tags.get(GameTag.TURN), "tainted_decks": False, } if eligible_for_unification(meta): # If the globalgame is eligible for unification, generate a digest # and get_or_create the object players = entity_tree.players lo1, lo2 = players[0].account_lo, players[1].account_lo digest = generate_globalgame_digest(meta, lo1, lo2) log.debug("GlobalGame digest is %r" % (digest)) global_game, created = GlobalGame.objects.get_or_create( digest=digest, defaults=defaults) else: global_game = GlobalGame.objects.create(digest=None, **defaults) created = True log.debug("Prepared GlobalGame(id=%r), created=%r", global_game.id, created) return global_game, created
def classify_deck( unclassified_deck, player_class=CardClass.INVALID, format=FormatType.FT_UNKNOWN ): """ Return an Archetype or None Classification proceeds in two steps: 1) First a set of explicit rules is executed, if the deck matches against any of these rules, then the Archetype is automatically assigned (Not Yet Implemented). 2) Second, if no Archetype was discovered than an Archetype was assigned by determining the minimum edit distance to an existing Archetype. However, if the deck is not within at least 5 cards from an Archetype then no Archetype will be assigned. """ log.debug("Classifying Deck With Cards: %r" % repr(unclassified_deck)) candidates = Archetype.objects.archetypes_for_class(player_class, format) distances = [] # On average we see 14 cards from the opponents deck # 30 cards: we accept 6 divergent cards, distance is: 12 # 20 cards: we accept 4 divergent cards, distance is: 8 + 5 (unrevealed) = 13 # 10 cards: we accept 2 divergent cards, distance is: 4 + 10 (unrevealed) = 14 # 5 cards: we accept 0 divergent cards, distance is: 12.5 (unrevealed) = 12.5 CUTOFF_DISTANCE = 14 for archetype, canonical_decks in candidates.items(): for canonical_deck in canonical_decks: dist = edit_distance(canonical_deck, unclassified_deck) log.debug("Archetype: %s, Distance: %s" % (archetype.name, str(dist))) if dist <= CUTOFF_DISTANCE: distances.append((archetype, dist)) if distances: return sorted(distances, key=lambda t: t[1])[0][0] else: return None
def classify_deck(deck, player_class=CardClass.INVALID, format=FormatType.FT_UNKNOWN): """ Return an Archetype or None Classification proceeds in two steps: 1) First a set of explicit rules is executed, if the deck matches against any of these rules, then the Archetype is automatically assigned (Not Yet Implemented). 2) Second, if no Archetype was discovered than an Archetype was assigned by determining the minimum edit distance to an existing Archetype. However, if the deck is not within at least 5 cards from an Archetype then no Archetype will be assigned. """ log.debug("Classifying Deck With Cards: %r" % (deck)) candidates = Archetype.objects.archetypes_for_class(player_class, format) distances = [] # On average we see 14 cards from the opponents deck # 30 cards: we accept 6 divergent cards, distance is: 12 # 20 cards: we accept 4 divergent cards, distance is: 8 + 5 (unrevealed) = 13 # 10 cards: we accept 2 divergent cards, distance is: 4 + 10 (unrevealed) = 14 # 5 cards: we accept 0 divergent cards, distance is: 12.5 (unrevealed) = 12.5 CUTOFF_DISTANCE = 14 for archetype, canonical_decks in candidates.items(): for canonical_deck in canonical_decks: dist = edit_distance(canonical_deck, deck) log.debug("Archetype: %s, Distance: %s" % (archetype.name, str(dist))) if dist <= CUTOFF_DISTANCE: distances.append((archetype, dist)) if distances: return sorted(distances, key=lambda t: t[1])[0][0] else: return None
def do_flush_exporter(): # Only if we were able to claim the advisory lock do we proceed here. if can_attempt_redshift_load: log.debug( "Redshift lock acquired. Will attempt to flush to redshift") if should_load_into_redshift(upload_event, global_game): with influx_timer("generate_redshift_game_info_duration"): game_info = get_game_info(global_game, replay) exporter.set_game_info(game_info) try: with influx_timer("flush_exporter_to_firehose_duration"): flush_failures_report = flush_exporter_to_firehose( exporter, records_to_flush=get_records_to_flush()) for target_table, errors in flush_failures_report.items( ): for error in errors: influx_metric( "firehose_flush_failure", { "stream_name": error["stream_name"], "error_code": error["error_code"], "error_message": error["error_message"], "count": 1 }, target_table=target_table) except Exception: raise else: global_game.loaded_into_redshift = timezone.now() global_game.save() # Okay to release the advisory lock once loaded_into_redshift is set # It will also be released automatically when the lambda exits. global_game.release_redshift_lock() else: log.debug( "Did not acquire redshift lock. Will not flush to redshift")
def do_flush_exporter(): # Only if we were able to claim the advisory lock do we proceed here. if can_attempt_redshift_load: log.debug("Redshift lock acquired. Will attempt to flush to redshift") if should_load_into_redshift(upload_event, global_game): with influx_timer("generate_redshift_game_info_duration"): game_info = get_game_info(global_game, replay) exporter.set_game_info(game_info) try: with influx_timer("flush_exporter_to_firehose_duration"): flush_exporter_to_firehose(exporter) except: raise else: global_game.loaded_into_redshift = timezone.now() global_game.save() # Okay to release the advisory lock once loaded_into_redshift is set # It will also be released automatically when the lambda exits. global_game.release_redshift_lock() else: log.debug("Did not acquire redshift lock. Will not flush to redshift")
def find_or_create_global_game(entity_tree, meta): ladder_season = meta.get("ladder_season") if not ladder_season: ladder_season = guess_ladder_season(meta["end_time"]) defaults = { "game_handle": meta.get("game_handle"), "server_address": meta.get("server_ip"), "server_port": meta.get("server_port"), "server_version": meta.get("server_version"), "game_type": meta.get("game_type", 0), "format": meta.get("format", 0), "build": meta["build"], "match_start": meta["start_time"], "match_end": meta["end_time"], "brawl_season": meta.get("brawl_season", 0), "ladder_season": ladder_season, "scenario_id": meta.get("scenario_id"), "num_entities": len(entity_tree.entities), "num_turns": entity_tree.tags.get(GameTag.TURN), } if eligible_for_unification(meta): # If the globalgame is eligible for unification, generate a digest # and get_or_create the object players = entity_tree.players lo1, lo2 = players[0].account_lo, players[1].account_lo digest = generate_globalgame_digest(meta, lo1, lo2) log.debug("GlobalGame digest is %r" % (digest)) global_game, created = GlobalGame.objects.get_or_create(digest=digest, defaults=defaults) else: global_game = GlobalGame.objects.create(digest=None, **defaults) created = True log.debug("Prepared GlobalGame(id=%r), created=%r", global_game.id, created) return global_game, created
def update_global_players(global_game, entity_tree, meta): # Fill the player metadata and objects players = {} for player in entity_tree.players: player_meta = meta.get("player%i" % (player.player_id), {}) decklist = player_meta.get("deck") if not decklist: decklist = [c.card_id for c in player.initial_deck if c.card_id] name, real_name = get_player_names(player) player_hero_id = player._hero.card_id deck, created = Deck.objects.get_or_create_from_id_list( decklist, hero_id=player_hero_id, game_type=global_game.game_type, classify_into_archetype=True ) log.debug("Prepared deck %i (created=%r)", deck.id, created) common = { "game": global_game, "player_id": player.player_id, } defaults = { "account_hi": player.account_hi, "account_lo": player.account_lo, "is_first": player.tags.get(GameTag.FIRST_PLAYER, False), "is_ai": player.is_ai, "hero_id": player_hero_id, "hero_premium": player._hero.tags.get(GameTag.PREMIUM, False), "final_state": player.tags.get(GameTag.PLAYSTATE, 0), "deck_list": deck, } update = { "name": name, "real_name": real_name, "rank": player_meta.get("rank"), "legend_rank": player_meta.get("legend_rank"), "stars": player_meta.get("stars"), "wins": player_meta.get("wins"), "losses": player_meta.get("losses"), "deck_id": player_meta.get("deck_id") or None, "cardback_id": player_meta.get("cardback"), } defaults.update(update) game_player, created = GlobalGamePlayer.objects.get_or_create(defaults=defaults, **common) log.debug("Prepared player %r (%i) (created=%r)", game_player, game_player.id, created) if not created: # Go through the update dict and update values on the player # This gets us extra data we might not have had when the player was first created updated = False for k, v in update.items(): if v and getattr(game_player, k) != v: setattr(game_player, k, v) updated = True # Skip updating the deck if we already have a bigger one # TODO: We should make deck_list nullable and only create it here if len(decklist) > game_player.deck_list.size: # XXX: Maybe we should also check friendly_player_id for good measure game_player.deck_list = deck updated = True if updated: log.debug("Saving updated player to the database.") game_player.save() players[player.player_id] = game_player return players
def find_or_create_replay(parser, entity_tree, meta, upload_event, global_game, players): client_handle = meta.get("client_handle") or None existing_replay = upload_event.game shortid = existing_replay.shortid if existing_replay else upload_event.shortid replay_xml_path = _generate_upload_path(global_game.match_start, shortid) log.debug("Will save replay %r to %r", shortid, replay_xml_path) # The user that owns the replay user = upload_event.token.user if upload_event.token else None friendly_player = players[meta["friendly_player"]] opponent_revealed_deck = get_opponent_revealed_deck( entity_tree, friendly_player.player_id, global_game.game_type ) hsreplay_doc = create_hsreplay_document(parser, entity_tree, meta, global_game) common = { "global_game": global_game, "client_handle": client_handle, "spectator_mode": meta.get("spectator_mode", False), "reconnecting": meta["reconnecting"], "friendly_player_id": friendly_player.player_id, } defaults = { "shortid": shortid, "aurora_password": meta.get("aurora_password", ""), "spectator_password": meta.get("spectator_password", ""), "resumable": meta.get("resumable"), "build": meta["build"], "upload_token": upload_event.token, "won": friendly_player.won, "replay_xml": replay_xml_path, "hsreplay_version": hsreplay_version, "hslog_version": hslog_version, "opponent_revealed_deck": opponent_revealed_deck, } # Create and save hsreplay.xml file # Noop in the database, as it should already be set before the initial save() xml_file = save_hsreplay_document(hsreplay_doc, shortid, existing_replay) influx_metric("replay_xml_num_bytes", {"size": xml_file.size}) if existing_replay: log.debug("Found existing replay %r", existing_replay.shortid) # Clean up existing replay file filename = existing_replay.replay_xml.name if filename and filename != replay_xml_path and default_storage.exists(filename): # ... but only if it's not the same path as the new one (it'll get overwridden) log.debug("Deleting %r", filename) default_storage.delete(filename) # Now update all the fields defaults.update(common) for k, v in defaults.items(): setattr(existing_replay, k, v) # Save the replay file existing_replay.replay_xml.save("hsreplay.xml", xml_file, save=False) # Finally, save to the db and exit early with created=False existing_replay.save() return existing_replay, False # No existing replay, so we assign a default user/visibility to the replay # (eg. we never update those fields on existing replays) # We also prepare a webhook for triggering, if there's one. if user: defaults["user"] = user defaults["visibility"] = user.default_replay_visibility if client_handle: # Get or create a replay object based on our defaults replay, created = GameReplay.objects.get_or_create(defaults=defaults, **common) log.debug("Replay %r has created=%r, client_handle=%r", replay.id, created, client_handle) else: # The client_handle is the minimum we require to update an existing replay. # If we don't have it, we won't try deduplication, we instead get_or_create by shortid. defaults.update(common) replay, created = GameReplay.objects.get_or_create(defaults=defaults, shortid=shortid) log.debug("Replay %r has created=%r (no client_handle)", replay.id, created) # Save the replay file replay.replay_xml.save("hsreplay.xml", xml_file, save=False) if replay.shortid != upload_event.shortid: # We must ensure an alias for this upload_event.shortid is recorded # We use get or create in case this is not the first time processing this replay ReplayAlias.objects.get_or_create(replay=replay, shortid=upload_event.shortid) if user: user.trigger_webhooks(replay) return replay, created
def find_or_create_replay(parser, entity_tree, meta, upload_event, global_game, players): client_handle = meta.get("client_handle") or None existing_replay = upload_event.game shortid = existing_replay.shortid if existing_replay else upload_event.shortid replay_xml_path = _generate_upload_path(global_game.match_start, shortid) log.debug("Will save replay %r to %r", shortid, replay_xml_path) # The user that owns the replay user = upload_event.token.user if upload_event.token else None friendly_player = players[meta["friendly_player"]] hsreplay_doc = create_hsreplay_document(parser, entity_tree, meta, global_game) common = { "global_game": global_game, "client_handle": client_handle, "spectator_mode": meta.get("spectator_mode", False), "reconnecting": meta["reconnecting"], "friendly_player_id": friendly_player.player_id, } defaults = { "shortid": shortid, "aurora_password": meta.get("aurora_password", ""), "spectator_password": meta.get("spectator_password", ""), "resumable": meta.get("resumable"), "build": meta["build"], "upload_token": upload_event.token, "won": friendly_player.won, "replay_xml": replay_xml_path, "hsreplay_version": hsreplay_doc.version, } # Create and save hsreplay.xml file # Noop in the database, as it should already be set before the initial save() xml_file = save_hsreplay_document(hsreplay_doc, shortid, existing_replay) influx_metric("replay_xml_num_bytes", {"size": xml_file.size}) if existing_replay: log.debug("Found existing replay %r", existing_replay.shortid) # Clean up existing replay file filename = existing_replay.replay_xml.name if filename and filename != replay_xml_path and default_storage.exists(filename): # ... but only if it's not the same path as the new one (it'll get overwridden) log.debug("Deleting %r", filename) default_storage.delete(filename) # Now update all the fields defaults.update(common) for k, v in defaults.items(): setattr(existing_replay, k, v) # Save the replay file existing_replay.replay_xml.save("hsreplay.xml", xml_file, save=False) # Finally, save to the db and exit early with created=False existing_replay.save() return existing_replay, False # No existing replay, so we assign a default user/visibility to the replay # (eg. we never update those fields on existing replays) if user: defaults["user"] = user defaults["visibility"] = user.default_replay_visibility if client_handle: # Get or create a replay object based on our defaults replay, created = GameReplay.objects.get_or_create(defaults=defaults, **common) log.debug("Replay %r has created=%r, client_handle=%r", replay.id, created, client_handle) else: # The client_handle is the minimum we require to update an existing replay. # If we don't have it, we won't try deduplication, we instead get_or_create by shortid. defaults.update(common) replay, created = GameReplay.objects.get_or_create(defaults=defaults, shortid=shortid) log.debug("Replay %r has created=%r (no client_handle)", replay.id, created) # Save the replay file replay.replay_xml.save("hsreplay.xml", xml_file, save=False) return replay, created
def update_global_players(global_game, entity_tree, meta): # Fill the player metadata and objects players = {} for player in entity_tree.players: player_meta = meta.get("player%i" % (player.player_id), {}) decklist = player_meta.get("deck") if not decklist: decklist = [c.card_id for c in player.initial_deck if c.card_id] name, real_name = get_player_names(player) deck, created = Deck.objects.get_or_create_from_id_list(decklist) log.debug("Prepared deck %i (created=%r)", deck.id, created) common = { "game": global_game, "player_id": player.player_id, } defaults = { "account_hi": player.account_hi, "account_lo": player.account_lo, "is_first": player.tags.get(GameTag.FIRST_PLAYER, False), "is_ai": player.is_ai, "hero_id": player._hero.card_id, "hero_premium": player._hero.tags.get(GameTag.PREMIUM, False), "final_state": player.tags.get(GameTag.PLAYSTATE, 0), "deck_list": deck, } update = { "name": name, "real_name": real_name, "rank": player_meta.get("rank"), "legend_rank": player_meta.get("legend_rank"), "stars": player_meta.get("stars"), "wins": player_meta.get("wins"), "losses": player_meta.get("losses"), "deck_id": player_meta.get("deck_id") or None, "cardback_id": player_meta.get("cardback"), } defaults.update(update) game_player, created = GlobalGamePlayer.objects.get_or_create(defaults=defaults, **common) log.debug("Prepared player %r (%i) (created=%r)", game_player, game_player.id, created) if not created: # Go through the update dict and update values on the player # This gets us extra data we might not have had when the player was first created updated = False for k, v in update.items(): if v and getattr(game_player, k) != v: setattr(game_player, k, v) updated = True # Skip updating the deck if we already have a bigger one # TODO: We should make deck_list nullable and only create it here if len(decklist) > game_player.deck_list.size(): # XXX: Maybe we should also check friendly_player_id for good measure game_player.deck_list = deck updated = True if updated: log.debug("Saving updated player to the database.") game_player.save() players[player.player_id] = game_player return players
def update_global_players(global_game, entity_tree, meta, upload_event, exporter): # Fill the player metadata and objects players = {} played_cards = exporter.export_played_cards() is_spectated_replay = meta.get("spectator_mode", False) is_dungeon_run = meta.get("scenario_id", 0) == 2663 for player in entity_tree.players: is_friendly_player = player.player_id == meta["friendly_player"] player_meta = meta.get("player%i" % (player.player_id), {}) decklist_from_meta = player_meta.get("deck") decklist_from_replay = [ c.initial_card_id for c in player.initial_deck if c.card_id ] meta_decklist_is_superset = _is_decklist_superset( decklist_from_meta, decklist_from_replay) # We disregard the meta decklist if it's not matching the replay decklist # We always want to use it in dungeon run though, since the initial deck is garbage disregard_meta = not meta_decklist_is_superset and ( not is_dungeon_run or not is_friendly_player) if not decklist_from_meta or is_spectated_replay or disregard_meta: # Spectated replays never know more than is in the replay data # But may have erroneous data from the spectator's client's memory # Read from before they entered the spectated game decklist = decklist_from_replay else: decklist = decklist_from_meta name, real_name = get_player_names(player) player_hero_id = player._hero.card_id try: deck, _ = Deck.objects.get_or_create_from_id_list( decklist, hero_id=player_hero_id, game_type=global_game.game_type, classify_archetype=True) log.debug("Prepared deck %i (created=%r)", deck.id, _) except IntegrityError as e: # This will happen if cards in the deck are not in the DB # For example, during a patch release influx_metric( "replay_deck_create_failure", { "count": 1, "build": meta["build"], "global_game_id": global_game.id, "server_ip": meta.get("server_ip", ""), "upload_ip": upload_event.upload_ip, "error": str(e), }) log.exception("Could not create deck for player %r", player) global_game.tainted_decks = True # Replace with an empty deck deck, _ = Deck.objects.get_or_create_from_id_list([]) capture_played_card_stats( global_game, [c.dbf_id for c in played_cards[player.player_id]], is_friendly_player) eligible_formats = [FormatType.FT_STANDARD, FormatType.FT_WILD] is_eligible_format = global_game.format in eligible_formats deck_prediction_enabled = getattr(settings, "FULL_DECK_PREDICTION_ENABLED", True) if deck_prediction_enabled and is_eligible_format and settings.ENV_AWS: try: player_class = Deck.objects._convert_hero_id_to_player_class( player_hero_id) tree = deck_prediction_tree(player_class, global_game.format) played_cards_for_player = played_cards[player.player_id] # 5 played cards partitions a 14 day window into buckets of ~ 500 or less # We can search through ~ 2,000 decks in 100ms so that gives us plenty of headroom min_played_cards = tree.max_depth - 1 # We can control via settings the minumum number of cards we need # To know about in the deck list before we attempt to guess the full deck min_observed_cards = settings.DECK_PREDICTION_MINIMUM_CARDS played_card_dbfs = [c.dbf_id for c in played_cards_for_player ][:min_played_cards] played_card_names = [c.name for c in played_cards_for_player ][:min_played_cards] if deck.size is not None: deck_size = deck.size else: deck_size = sum(i.count for i in deck.includes.all()) has_enough_observed_cards = deck_size >= min_observed_cards has_enough_played_cards = len( played_card_dbfs) >= min_played_cards if deck_size == 30: tree.observe(deck.id, deck.dbf_map(), played_card_dbfs) # deck_id == proxy_deck_id for complete decks deck.guessed_full_deck = deck deck.save() elif has_enough_observed_cards and has_enough_played_cards: res = tree.lookup( deck.dbf_map(), played_card_dbfs, ) predicted_deck_id = res.predicted_deck_id fields = { "actual_deck_id": deck.id, "deck_size": deck_size, "game_id": global_game.id, "sequence": "->".join("[%s]" % c for c in played_card_names), "predicted_deck_id": res.predicted_deck_id, "match_attempts": res.match_attempts, "tie": res.tie } if settings.DETAILED_PREDICTION_METRICS: fields["actual_deck"] = repr(deck) if res.predicted_deck_id: predicted_deck = Deck.objects.get( id=res.predicted_deck_id) fields["predicted_deck"] = repr(predicted_deck) if res.node: fields["depth"] = res.node.depth if settings.DETAILED_PREDICTION_METRICS: node_labels = [] for path_dbf_id in res.path(): if path_dbf_id == "ROOT": path_str = path_dbf_id else: path_card = Card.objects.get( dbf_id=path_dbf_id) path_str = path_card.name node_labels.append("[%s]" % path_str) fields["node"] = "->".join(node_labels) popularity = res.popularity_distribution.popularity( res.predicted_deck_id) fields["predicted_deck_popularity"] = popularity deck_count = res.popularity_distribution.size() fields["distribution_deck_count"] = deck_count observation_count = res.popularity_distribution.observations( ) fields[ "distribution_observation_count"] = observation_count tree_depth = res.node.depth if res.node else None influx_metric( "deck_prediction", fields, missing_cards=30 - deck_size, player_class=CardClass(int(player_class)).name, format=FormatType(int(global_game.format)).name, tree_depth=tree_depth, made_prediction=predicted_deck_id is not None) if predicted_deck_id: deck.guessed_full_deck = Deck.objects.get( id=predicted_deck_id) deck.save() except Exception as e: error_handler(e) # Create the BlizzardAccount first defaults = { "region": BnetRegion.from_account_hi(player.account_hi), "battletag": name, } if not is_spectated_replay and not player.is_ai and is_friendly_player: user = upload_event.token.user if upload_event.token else None if user and not user.is_fake: # and user.battletag and user.battletag.startswith(player.name): defaults["user"] = user blizzard_account, created = BlizzardAccount.objects.get_or_create( account_hi=player.account_hi, account_lo=player.account_lo, defaults=defaults) if not created and not blizzard_account.user and "user" in defaults: # Set BlizzardAccount.user if it's an available claim for the user influx_metric( "pegasus_account_claimed", { "count": 1, "account": str(blizzard_account.id), "region": str(blizzard_account.region), "account_lo": str(blizzard_account.account_lo), "game": str(global_game.id) }) blizzard_account.user = defaults["user"] blizzard_account.save() log.debug("Prepared BlizzardAccount %r", blizzard_account) # Now create the GlobalGamePlayer object common = { "game": global_game, "player_id": player.player_id, } defaults = { "is_first": player.tags.get(GameTag.FIRST_PLAYER, False), "is_ai": player.is_ai, "hero_id": player_hero_id, "hero_premium": player._hero.tags.get(GameTag.PREMIUM, False), "final_state": player.tags.get(GameTag.PLAYSTATE, 0), "extra_turns": player.tags.get(GameTag.EXTRA_TURNS_TAKEN_THIS_GAME, 0), "deck_list": deck, } update = { "name": name, "real_name": real_name, "pegasus_account": blizzard_account, "rank": player_meta.get("rank"), "legend_rank": player_meta.get("legend_rank"), "stars": player_meta.get("stars"), "wins": player_meta.get("wins"), "losses": player_meta.get("losses"), "deck_id": player_meta.get("deck_id") or None, "cardback_id": player_meta.get("cardback"), } defaults.update(update) game_player, created = GlobalGamePlayer.objects.get_or_create( defaults=defaults, **common) log.debug("Prepared player %r (%i) (created=%r)", game_player, game_player.id, created) if not created: # Go through the update dict and update values on the player # This gets us extra data we might not have had when the player was first created updated = False for k, v in update.items(): if v and getattr(game_player, k) != v: setattr(game_player, k, v) updated = True # Skip updating the deck if we already have a bigger one # TODO: We should make deck_list nullable and only create it here if game_player.deck_list.size is None or len( decklist) > game_player.deck_list.size: # XXX: Maybe we should also check friendly_player_id for good measure game_player.deck_list = deck updated = True if updated: log.debug("Saving updated player to the database.") game_player.save() players[player.player_id] = game_player return players
def update_global_players(global_game, entity_tree, meta, upload_event): # Fill the player metadata and objects players = {} for player in entity_tree.players: player_meta = meta.get("player%i" % (player.player_id), {}) is_spectated_replay = meta.get("spectator_mode", False) is_friendly_player = player.player_id == meta["friendly_player"] decklist_from_meta = player_meta.get("deck") decklist_from_replay = [c.card_id for c in player.initial_deck if c.card_id] meta_decklist_is_superset = _is_decklist_superset( decklist_from_meta, decklist_from_replay ) if not decklist_from_meta or is_spectated_replay or not meta_decklist_is_superset: # Spectated replays never know more than is in the replay data # But may have erroneous data from the spectator's client's memory # Read from before they entered the spectated game decklist = decklist_from_replay else: decklist = decklist_from_meta name, real_name = get_player_names(player) player_hero_id = player._hero.card_id try: deck, _ = Deck.objects.get_or_create_from_id_list( decklist, hero_id=player_hero_id, game_type=global_game.game_type, classify_into_archetype=True ) log.debug("Prepared deck %i (created=%r)", deck.id, _) except IntegrityError as e: # This will happen if cards in the deck are not in the DB # For example, during a patch release influx_metric("replay_deck_create_failure", {"global_game_id": global_game.id}) log.exception("Could not create deck for player %r", player) global_game.tainted_decks = True # Replace with an empty deck deck, _ = Deck.objects.get_or_create_from_id_list([]) # Create the PegasusAccount first defaults = { "region": BnetRegion.from_account_hi(player.account_hi), "battletag": name, } if not is_spectated_replay and not player.is_ai and is_friendly_player: user = upload_event.token.user if upload_event.token else None if user and not user.is_fake: # and user.battletag and user.battletag.startswith(player.name): defaults["user"] = user pegasus_account, created = PegasusAccount.objects.get_or_create( account_hi=player.account_hi, account_lo=player.account_lo, defaults=defaults ) if not created and not pegasus_account.user and "user" in defaults: # Set PegasusAccount.user if it's an available claim for the user influx_metric("pegasus_account_claimed", {"count": 1}) pegasus_account.user = defaults["user"] pegasus_account.save() log.debug("Prepared PegasusAccount %r", pegasus_account) # Now create the GlobalGamePlayer object common = { "game": global_game, "player_id": player.player_id, } defaults = { "account_hi": player.account_hi, "account_lo": player.account_lo, "is_first": player.tags.get(GameTag.FIRST_PLAYER, False), "is_ai": player.is_ai, "hero_id": player_hero_id, "hero_premium": player._hero.tags.get(GameTag.PREMIUM, False), "final_state": player.tags.get(GameTag.PLAYSTATE, 0), "deck_list": deck, } update = { "name": name, "real_name": real_name, "pegasus_account": pegasus_account, "rank": player_meta.get("rank"), "legend_rank": player_meta.get("legend_rank"), "stars": player_meta.get("stars"), "wins": player_meta.get("wins"), "losses": player_meta.get("losses"), "deck_id": player_meta.get("deck_id") or None, "cardback_id": player_meta.get("cardback"), } defaults.update(update) game_player, created = GlobalGamePlayer.objects.get_or_create(defaults=defaults, **common) log.debug("Prepared player %r (%i) (created=%r)", game_player, game_player.id, created) if not created: # Go through the update dict and update values on the player # This gets us extra data we might not have had when the player was first created updated = False for k, v in update.items(): if v and getattr(game_player, k) != v: setattr(game_player, k, v) updated = True # Skip updating the deck if we already have a bigger one # TODO: We should make deck_list nullable and only create it here if len(decklist) > game_player.deck_list.size: # XXX: Maybe we should also check friendly_player_id for good measure game_player.deck_list = deck updated = True if updated: log.debug("Saving updated player to the database.") game_player.save() players[player.player_id] = game_player return players
def find_or_create_replay(parser, entity_tree, meta, upload_event, global_game, players): client_handle = meta.get("client_handle") or None existing_replay = upload_event.game shortid = existing_replay.shortid if existing_replay else upload_event.shortid replay_xml_path = _generate_upload_path(global_game.match_start, shortid) log.debug("Will save replay %r to %r", shortid, replay_xml_path) # The user that owns the replay user = upload_event.token.user if upload_event.token else None friendly_player = players[meta["friendly_player"]] opponent_revealed_deck = get_opponent_revealed_deck( entity_tree, friendly_player.player_id, global_game.game_type ) hsreplay_doc = create_hsreplay_document(parser, entity_tree, meta, global_game) common = { "global_game": global_game, "client_handle": client_handle, "spectator_mode": meta.get("spectator_mode", False), "reconnecting": meta["reconnecting"], "friendly_player_id": friendly_player.player_id, } defaults = { "shortid": shortid, "aurora_password": meta.get("aurora_password", ""), "spectator_password": meta.get("spectator_password", ""), "resumable": meta.get("resumable"), "build": meta["build"], "upload_token": upload_event.token, "won": friendly_player.won, "replay_xml": replay_xml_path, "hsreplay_version": hsreplay_version, "hslog_version": hslog_version, "upload_ip": upload_event.upload_ip, "user_agent": upload_event.user_agent, "opponent_revealed_deck": opponent_revealed_deck, } # Create and save hsreplay.xml file # Noop in the database, as it should already be set before the initial save() xml_file = save_hsreplay_document(hsreplay_doc, shortid, existing_replay) influx_metric("replay_xml_num_bytes", {"size": xml_file.size}) if existing_replay: log.debug("Found existing replay %r", existing_replay.shortid) # Clean up existing replay file filename = existing_replay.replay_xml.name if filename and filename != replay_xml_path and default_storage.exists(filename): # ... but only if it's not the same path as the new one (it'll get overwridden) log.debug("Deleting %r", filename) default_storage.delete(filename) # Now update all the fields defaults.update(common) for k, v in defaults.items(): setattr(existing_replay, k, v) # Save the replay file existing_replay.replay_xml.save("hsreplay.xml", xml_file, save=False) # Finally, save to the db and exit early with created=False existing_replay.save() return existing_replay, False # No existing replay, so we assign a default user/visibility to the replay # (eg. we never update those fields on existing replays) # We also prepare a webhook for triggering, if there's one. if user: defaults["user"] = user defaults["visibility"] = user.default_replay_visibility if client_handle: # Get or create a replay object based on our defaults replay, created = GameReplay.objects.get_or_create(defaults=defaults, **common) log.debug("Replay %r has created=%r, client_handle=%r", replay.id, created, client_handle) else: # The client_handle is the minimum we require to update an existing replay. # If we don't have it, we won't try deduplication, we instead get_or_create by shortid. defaults.update(common) replay, created = GameReplay.objects.get_or_create(defaults=defaults, shortid=shortid) log.debug("Replay %r has created=%r (no client_handle)", replay.id, created) if not created: # This can only happen if there is an inconsistency between UploadEvent.game # and the processing run. # For example, the processing crashed before UploadEvent.save(), or there are # multiple processing calls before UploadEvent.game is saved. msg = "Replay %r already exists. Try reprocessing (again)." % (shortid) raise ReplayAlreadyExists(msg, replay) # Save the replay file replay.replay_xml.save("hsreplay.xml", xml_file, save=False) if replay.shortid != upload_event.shortid: # We must ensure an alias for this upload_event.shortid is recorded # We use get or create in case this is not the first time processing this replay ReplayAlias.objects.get_or_create(replay=replay, shortid=upload_event.shortid) if user: # Re-query the replay object for the webhook trigger user.trigger_webhooks(GameReplay.objects.get(id=replay.id)) return replay, created
def do_process_upload_event(upload_event): meta = json.loads(upload_event.metadata) # Hack until we do something better # We need the correct tz, but here it's stored as UTC because it goes through DRF # https://github.com/encode/django-rest-framework/commit/7d6d043531 if upload_event.descriptor_data: descriptor_data = json.loads(upload_event.descriptor_data) meta["match_start"] = descriptor_data["upload_metadata"]["match_start"] # Parse the UploadEvent's file parser = parse_upload_event(upload_event, meta) # Validate the resulting object and metadata entity_tree, exporter = validate_parser(parser, meta) # Create/Update the global game object and its players global_game, global_game_created = find_or_create_global_game( entity_tree, meta) players = update_global_players(global_game, entity_tree, meta, upload_event, exporter) # Create/Update the replay object itself replay, game_replay_created = find_or_create_replay( parser, entity_tree, meta, upload_event, global_game, players) update_player_class_distribution(replay) can_attempt_redshift_load = False if global_game.loaded_into_redshift is None: log.debug("Global game has not been loaded into redshift.") # Attempt to claim the advisory_lock, if successful: can_attempt_redshift_load = global_game.acquire_redshift_lock() else: log.debug("Global game has already been loaded into Redshift") # Defer flushing the exporter until after the UploadEvent is set to SUCCESS # So that the player can start watching their replay sooner def do_flush_exporter(): # Only if we were able to claim the advisory lock do we proceed here. if can_attempt_redshift_load: log.debug( "Redshift lock acquired. Will attempt to flush to redshift") if should_load_into_redshift(upload_event, global_game): with influx_timer("generate_redshift_game_info_duration"): game_info = get_game_info(global_game, replay) exporter.set_game_info(game_info) try: with influx_timer("flush_exporter_to_firehose_duration"): flush_failures_report = flush_exporter_to_firehose( exporter, records_to_flush=get_records_to_flush()) for target_table, errors in flush_failures_report.items( ): for error in errors: influx_metric( "firehose_flush_failure", { "stream_name": error["stream_name"], "error_code": error["error_code"], "error_message": error["error_message"], "count": 1 }, target_table=target_table) except Exception: raise else: global_game.loaded_into_redshift = timezone.now() global_game.save() # Okay to release the advisory lock once loaded_into_redshift is set # It will also be released automatically when the lambda exits. global_game.release_redshift_lock() else: log.debug( "Did not acquire redshift lock. Will not flush to redshift") return replay, do_flush_exporter