def handle(self, *args, **options):
        conn = redshift.get_new_redshift_connection()

        lookback_val = options["look_back"]
        if lookback_val:
            lookback = int(lookback_val[0])
        else:
            lookback = 14

        end_ts = date.today() - timedelta(days=1)
        start_ts = end_ts - timedelta(days=lookback)

        redis_host = options["redis_host"]
        if redis_host:
            redis_client = redis.StrictRedis(host=redis_host[0])
            pipeline = redis_client.pipeline(transaction=False)
        else:
            pipeline = None

        params = {"start_date": start_ts, "end_date": end_ts}
        compiled_statement = REDSHIFT_QUERY.params(params).compile(bind=conn)
        start_ts = time.time()
        for row in conn.execute(compiled_statement):
            as_of = row["match_start"]
            deck_id = row["deck_id"]
            dbf_map = {
                dbf_id: count
                for dbf_id, count in json.loads(row["deck_list"])
            }
            player_class = CardClass(row["player_class"])
            format = FormatType.FT_STANDARD if row[
                "game_type"] == 2 else FormatType.FT_WILD
            played_cards = json.loads(row["played_cards"])

            tree = deck_prediction_tree(player_class,
                                        format,
                                        redis_client=pipeline)
            min_played_cards = tree.max_depth - 1
            played_card_dbfs = played_cards[:min_played_cards]
            deck_size = sum(dbf_map.values())

            if deck_size == 30:
                tree.observe(deck_id, dbf_map, played_card_dbfs, as_of=as_of)

            if len(pipeline) >= 8000:
                pipeline.execute()

        if len(pipeline):
            pipeline.execute()

        end_ts = time.time()
        duration_seconds = round(end_ts - start_ts)
        print("Took: %i Seconds" % duration_seconds)
    def handle(self, *args, **options):
        conn = redshift.get_new_redshift_connection()
        is_dry_run = options["dry_run"]
        verbosity = options["verbosity"]

        end_ts = date.today()
        start_ts = end_ts - timedelta(days=options["lookback"])

        params = {"start_date": start_ts, "end_date": end_ts}
        compiled_statement = REDSHIFT_QUERY.params(params).compile(bind=conn)

        for card_class in CardClass:
            if 2 <= card_class <= 10:
                for a in Archetype.objects.live().filter(
                        player_class=card_class):
                    self.archetype_map[a.id] = a

                # Standard Signature Weights
                standard_weight_values = ClusterSnapshot.objects.get_signature_weights(
                    FormatType.FT_STANDARD, card_class)
                if len(standard_weight_values):
                    self.signature_weights[FormatType.FT_STANDARD][
                        card_class] = standard_weight_values

                # Wild Signature Weights
                wild_weight_values = ClusterSnapshot.objects.get_signature_weights(
                    FormatType.FT_WILD, card_class)
                if len(wild_weight_values):
                    self.signature_weights[FormatType.FormatType.FT_WILD][
                        card_class] = wild_weight_values

        result_set = list(conn.execute(compiled_statement))
        total_rows = len(result_set)
        self.stdout.write("%i decks to update" % (total_rows))
        if is_dry_run:
            self.stdout.write("Dry run, will not flush to databases")

        for counter, row in enumerate(result_set):
            deck_id = row["deck_id"]
            if not is_dry_run and counter % 100000 == 0:
                self.flush_db_buffer()
                self.flush_firehose_buffer()

            if deck_id is None:
                self.stderr.write("Got deck_id %r ... skipping" % (deck_id))
                continue

            current_archetype_id = row["archetype_id"]
            player_class = CardClass(row["player_class"])
            if player_class == CardClass.NEUTRAL:
                # Most likely noise
                self.stderr.write("Found and skipping NEUTRAL data: %r" %
                                  (row))
                continue
            format = FormatType.FT_STANDARD if row[
                "game_type"] == 2 else FormatType.FT_WILD

            dbf_map = {
                dbf_id: count
                for dbf_id, count in json.loads(row["deck_list"])
            }
            if player_class not in self.signature_weights[format]:
                raise RuntimeError(
                    "%r not found for %r. Are signatures present?" %
                    (player_class, format))

            if self.signature_weights[format][player_class]:
                new_archetype_id = classify_deck(
                    dbf_map, self.signature_weights[format][player_class])

                if new_archetype_id == current_archetype_id:
                    if verbosity > 1:
                        self.stdout.write("Deck %r - Nothing to do." %
                                          (deck_id))
                    continue

                current_name = self.get_archetype_name(current_archetype_id)
                new_name = self.get_archetype_name(new_archetype_id)

                pct_complete = str(math.floor(100.0 * counter / total_rows))

                self.stdout.write(
                    "\t[%s%%] Reclassifying deck %r: %s => %s\n" %
                    (pct_complete, deck_id, current_name, new_name))

                if not is_dry_run:
                    self.buffer_archetype_update(deck_id, new_archetype_id)

        if not is_dry_run:
            self.flush_db_buffer()
            self.flush_firehose_buffer()
        else:
            self.stdout.write("Dry run complete")
Esempio n. 3
0
    def handle(self, *args, **options):
        if options["resume"]:
            if not os.path.isfile(options["out"]):
                self.stdout.write(
                    "File does not exist, unable to resume. Aborting.")
                return
        else:
            if os.path.isfile(options["out"]) and not options["noinput"]:
                msg = "File already exists. Overwrite?"
                if input("%s [y/N] " % msg).lower() != "y":
                    self.stdout.write("Aborting.")
                    return

        from_ = options["from"]
        to = options["to"]
        if from_ > to:
            to, from_ = from_, to

        one_day = timedelta(days=1)
        start_date = parse(from_) \
         .replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.utc)
        end_date = parse(to) \
         .replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.utc)
        self.stdout.write("Gathering data from %s to %s..." % (
            start_date.date(),
            end_date.date(),
        ))

        conn = get_new_redshift_connection()

        fieldnames = [
            "game_date",
            "format",
            "player_class",
            "decklist",
            "observed_decklist",
            "play_sequence",
            "label",
        ]
        rows = 0
        if options["resume"]:
            mode = "w"
        else:
            mode = "wt"
        with open(options["out"], mode, newline="") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            if not options["resume"]:
                writer.writeheader()

            current_date = start_date
            archetype_ids = set()
            while current_date <= end_date:
                self.stdout.write("Gathering decks from %s..." %
                                  current_date.date())

                # For each day, let's grab the most recent snapshots for that day
                # While this ignores any previous snapshots on the same day, we don't expect the
                # deck composition to have changed significantly between snapshots
                snapshot = ClusterSetSnapshot.objects.prefetch_related(
                    "classclustersnapshot_set").filter(
                        promoted_on__lte=current_date).order_by(
                            "-promoted_on").first()

                if snapshot:
                    deck_ids, archetype_by_deck_id = _get_deck_ids_from_snapshot(
                        snapshot)
                    self.stdout.write("Got %d decks" % len(deck_ids))

                    # Grab the instance rows from Redshift, based on the cluster deck ids
                    self.stdout.write("Gathering games from %s..." %
                                      current_date.date())
                    params = {
                        "min_date": current_date,
                        "max_date": current_date,
                        "deck_ids": deck_ids,
                    }
                    compiled_statement = REDSHIFT_QUERY.params(params).compile(
                        bind=conn)
                    for row in conn.execute(compiled_statement):
                        archetype_id = archetype_by_deck_id[row.deck_id]
                        if archetype_id:
                            archetype_ids.add(archetype_id)
                        vals = {
                            "game_date": str(row.game_date),
                            "format": row.format,
                            "player_class": row.player_class,
                            "decklist": row.decklist,
                            "observed_decklist": row.observed_decklist,
                            "play_sequence": row.play_sequence,
                            "label": archetype_id,
                        }
                        rows += 1
                        writer.writerow(vals)
                else:
                    self.stdout.write("No snapshot live on %s" %
                                      current_date.date())

                current_date += one_day

        if options["write_labels"]:
            with open(options["write_labels"], "wt", newline="") as csvfile:
                writer = csv.DictWriter(csvfile, fieldnames=["label", "name"])
                writer.writeheader()
                archetype_id_list = sorted(archetype_ids)
                for archetype_id in archetype_id_list:
                    if archetype_id == -1:
                        archetype_name = "Experimental"
                    else:
                        archetype = Archetype.objects.filter(
                            id=archetype_id).first()
                        archetype_name = archetype.name
                    writer.writerow({
                        "label": archetype_id,
                        "name": archetype_name
                    })

        self.stdout.write("Done. Wrote %d rows." % rows)