Beispiel #1
0
def fix_missing_files():
    """ Finds replays set as "ARCHIVED" that are missing a corresponding file stored in S3. Re-adds them
        to GC queue. """
    _error = "MISSING_S3_FILE"

    all_s3_replay_ids = [
        replay_file.key[8:-8] for replay_file in dotabank_bucket.list()
        if replay_file.key[:8] == "replays/"
    ]
    archived_replays_no_file = Replay.query.filter(
        Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

    for replay in archived_replays_no_file:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print(
            "Replay {} is \"ARCHIVED\" but does not have a file stored on S3. Re-adding to GC queue."
            .format(replay.id))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Beispiel #2
0
    def small_replay_exodus(self):
        small_replay_files = {
            replay_file.key[8:-8]: replay_file.size
            for replay_file in dotabank_bucket.list()
            if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                         1024)
        }
        small_replays = Replay.query.filter(
            Replay.id.in_(small_replay_files.keys())).all()

        replays_removed = []  # IDs of removed replays
        for replay in small_replays:
            # Save local URI so we can remove the file from S3 after we've changed the databose.
            local_uri = replay.local_uri

            # Clean up metadata associated with an archived replay.
            replay.dl_done_time = None
            replay.local_uri = None
            replay.state = "WAITING_DOWNLOAD"

            # Save ne state to database
            db.session.add(replay)
            db.session.commit()

            # Remove bad file from S3.
            dotabank_bucket.delete_key(
                local_uri or "replays/{}.dem.bz2".format(replay.id))

            # Add a new download job
            Replay.add_dl_job(replay)

            # Note that we've done things to this replay.
            replays_removed.append(replay.id)

        return jsonify(success=True, replays_removed=replays_removed)
def fix_small_replays():
    """ Finds replays with a tiny filesize and re-adds them to the GC queue (we probably downloaded a error page.
    """
    _error = "SMALL_REPLAY"

    # FIXME: This step will take longer and longer the more replays we store.  It would be more efficient to store
    # the filesize in our local database after a file has been archived, and then directly query the database.
    small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list()
                          if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}

    small_replays = db.session.query(Replay, db.func.count(ReplayAutoFix.id)).filter(
        Replay.state == "ARCHIVED",                 # Ignore non-archived files (they shouldnt be in s3 if they aren't archived, but vOv)
        Replay.id.in_(small_replay_files.keys()),   # Check the replays that the S3 call above has flagged as small
        ReplayAutoFix.replay_id == Replay.id
    ).group_by(
        ReplayAutoFix.replay_id
    ).having(
        db.func.count(ReplayAutoFix.id) < app.config.get('MAX_REPLAY_FIX_ATTEMPTS')  # Ignore replays that have exceeded max fix attempts
    ).all()

    for replay, fix_attempts in small_replays:
        if not should_fix_be_attempted(replay.id, _error, extra={
            'file_size': small_replay_files[unicode(replay.id)]
        }):
            continue

        print ("Replay {} has a small file stored on s3 ({} bytes).  Re-adding to DL queue.".format(
            replay.id,
            small_replay_files[unicode(replay.id)]
        ))
        replay.state = "WAITING_GC"  # Switch state back to WAITING_GC.
        Replay.add_dl_job(replay)
Beispiel #4
0
    def index(self):
        """ Renders a list of replays which are atypical.

        human_players_discrepancy: Replays where their human_player property doesn't match the count of ReplayPlayer
        objects we have in our database.

        replay_available_download_error: Replays which are available to download, but that our download script failed to
        retrieve.

        replay_waiting_download_over24hrs: Replays which have been waiting to be downloaded for over 24 hrs.
        """
        human_players_discrepancy = [x for x in db.engine.execute(
            text("""
                SELECT
                    r.id,
                    r.human_players,
                    count(*) as player_count
                FROM {replay_table} r
                LEFT JOIN {player_table} rp ON rp.replay_id = r.id
                WHERE
                    rp.account_id is not NULL  # Exclude bots from count (though there's the chance we have duplicate entries for bots? fack)
                GROUP BY rp.replay_id
            """.format(
                replay_table=Replay.__tablename__,
                player_table=ReplayPlayer.__tablename__)
            )
        ) if x.player_count != x.human_players]

        replay_available_download_error = Replay.query.filter(
            Replay.replay_state == "REPLAY_AVAILABLE",
            Replay.state == "DOWNLOAD_ERROR"
        ).all()

        replay_waiting_download_over24hrs = Replay.query.filter(
            Replay.state == "WAITING_DOWNLOAD",
            Replay.gc_done_time <= (datetime.utcnow() - timedelta(hours=24))  # Over 24 hrs ago
        ).all()

        small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}
        small_replays = Replay.query.filter(Replay.id.in_(small_replay_files.keys())).all()

        all_s3_replay_ids = [replay_file.key[8:-8] for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/"]
        archived_replays_no_file = Replay.query.filter(Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

        return self.render(
            'admin/atypical_replays.html',
            human_players_discrepancy=human_players_discrepancy,
            replay_available_download_error=replay_available_download_error,
            replay_waiting_download_over24hrs=replay_waiting_download_over24hrs,
            small_replays=small_replays,
            small_replay_files=small_replay_files,
            archived_replays_no_file=archived_replays_no_file
        )
Beispiel #5
0
def fix_small_replays():
    """ Finds replays with a tiny filesize and re-adds them to the GC queue (we probably downloaded a error page.
    """
    _error = "SMALL_REPLAY"

    # FIXME: This step will take longer and longer the more replays we store.  It would be more efficient to store
    # the filesize in our local database after a file has been archived, and then directly query the database.
    small_replay_files = {
        replay_file.key[8:-8]: replay_file.size
        for replay_file in dotabank_bucket.list()
        if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                     1024)
    }

    small_replays = db.session.query(
        Replay, db.func.count(ReplayAutoFix.id)
    ).filter(
        Replay.state ==
        "ARCHIVED",  # Ignore non-archived files (they shouldnt be in s3 if they aren't archived, but vOv)
        Replay.id.in_(small_replay_files.keys(
        )),  # Check the replays that the S3 call above has flagged as small
        ReplayAutoFix.replay_id == Replay.id).group_by(
            ReplayAutoFix.replay_id).having(
                db.func.count(ReplayAutoFix.id) < app.config.get(
                    'MAX_REPLAY_FIX_ATTEMPTS'
                )  # Ignore replays that have exceeded max fix attempts
            ).all()

    for replay, fix_attempts in small_replays:
        if not should_fix_be_attempted(
                replay.id,
                _error,
                extra={'file_size': small_replay_files[unicode(replay.id)]}):
            continue

        print(
            "Replay {} has a small file stored on s3 ({} bytes).  Re-adding to DL queue."
            .format(replay.id, small_replay_files[unicode(replay.id)]))
        replay.state = "WAITING_GC"  # Switch state back to WAITING_GC.
        Replay.add_dl_job(replay)
def fix_missing_files():
    """ Finds replays set as "ARCHIVED" that are missing a corresponding file stored in S3. Re-adds them
        to GC queue. """
    _error = "MISSING_S3_FILE"

    all_s3_replay_ids = [replay_file.key[8:-8] for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/"]
    archived_replays_no_file = Replay.query.filter(Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

    for replay in archived_replays_no_file:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print ("Replay {} is \"ARCHIVED\" but does not have a file stored on S3. Re-adding to GC queue.".format(
            replay.id
        ))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Beispiel #7
0
    def small_replay_exodus(self):
        small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}
        small_replays = Replay.query.filter(Replay.id.in_(small_replay_files.keys())).all()

        replays_removed = []  # IDs of removed replays
        for replay in small_replays:
            # Save local URI so we can remove the file from S3 after we've changed the databose.
            local_uri = replay.local_uri

            # Clean up metadata associated with an archived replay.
            replay.dl_done_time = None
            replay.local_uri = None
            replay.state = "WAITING_DOWNLOAD"

            # Save ne state to database
            db.session.add(replay)
            db.session.commit()

            # Remove bad file from S3.
            dotabank_bucket.delete_key(local_uri or "replays/{}.dem.bz2".format(replay.id))

            # Add a new download job
            Replay.add_dl_job(replay)

            # Note that we've done things to this replay.
            replays_removed.append(replay.id)

        return jsonify(
            success=True,
            replays_removed=replays_removed
        )
Beispiel #8
0
    def index(self):
        """ Renders a list of replays which are atypical.

        human_players_discrepancy: Replays where their human_player property doesn't match the count of ReplayPlayer
        objects we have in our database.

        replay_available_download_error: Replays which are available to download, but that our download script failed to
        retrieve.

        replay_waiting_download_over24hrs: Replays which have been waiting to be downloaded for over 24 hrs.
        """
        human_players_discrepancy = [
            x for x in db.engine.execute(
                text("""
                SELECT
                    r.id,
                    r.human_players,
                    count(rp.id) as player_count
                FROM {replay_table} r
                LEFT JOIN {player_table} rp ON rp.replay_id = r.id
                WHERE
                    rp.id is NULL or
                    rp.account_id is not NULL  # Exclude bots from count (though there's the chance we have duplicate entries for bots? fack)
                GROUP BY r.id
            """.format(replay_table=Replay.__tablename__,
                       player_table=ReplayPlayer.__tablename__)))
            if x.player_count != x.human_players
        ]

        replay_available_download_error = Replay.query.filter(
            Replay.replay_state == "REPLAY_AVAILABLE",
            Replay.state == "DOWNLOAD_ERROR").all()

        replay_waiting_download_over24hrs = Replay.query.filter(
            Replay.state == "WAITING_DOWNLOAD",
            Replay.gc_done_time <=
            (datetime.utcnow() - timedelta(hours=24))  # Over 24 hrs ago
        ).all()

        small_replay_files = {
            replay_file.key[8:-8]: replay_file.size
            for replay_file in dotabank_bucket.list()
            if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                         1024)
        }
        small_replays = Replay.query.filter(
            Replay.id.in_(small_replay_files.keys())).all()

        all_s3_replay_ids = [
            replay_file.key[8:-8] for replay_file in dotabank_bucket.list()
            if replay_file.key[:8] == "replays/"
        ]
        archived_replays_no_file = Replay.query.filter(
            Replay.state == 'ARCHIVED',
            Replay.id.notin_(all_s3_replay_ids)).all()

        return self.render(
            'admin/atypical_replays.html',
            human_players_discrepancy=human_players_discrepancy,
            replay_available_download_error=replay_available_download_error,
            replay_waiting_download_over24hrs=replay_waiting_download_over24hrs,
            small_replays=small_replays,
            small_replay_files=small_replay_files,
            archived_replays_no_file=archived_replays_no_file)