Esempio n. 1
0
def fix_missing_files():
    """ Finds replays set as "ARCHIVED" that are missing a corresponding file stored in S3. Re-adds them
        to GC queue. """
    _error = "MISSING_S3_FILE"

    all_s3_replay_ids = [
        replay_file.key[8:-8] for replay_file in dotabank_bucket.list()
        if replay_file.key[:8] == "replays/"
    ]
    archived_replays_no_file = Replay.query.filter(
        Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

    for replay in archived_replays_no_file:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print(
            "Replay {} is \"ARCHIVED\" but does not have a file stored on S3. Re-adding to GC queue."
            .format(replay.id))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Esempio n. 2
0
    def small_replay_exodus(self):
        small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}
        small_replays = Replay.query.filter(Replay.id.in_(small_replay_files.keys())).all()

        replays_removed = []  # IDs of removed replays
        for replay in small_replays:
            # Save local URI so we can remove the file from S3 after we've changed the databose.
            local_uri = replay.local_uri

            # Clean up metadata associated with an archived replay.
            replay.dl_done_time = None
            replay.local_uri = None
            replay.state = "WAITING_DOWNLOAD"

            # Save ne state to database
            db.session.add(replay)
            db.session.commit()

            # Remove bad file from S3.
            dotabank_bucket.delete_key(local_uri or "replays/{}.dem.bz2".format(replay.id))

            # Add a new download job
            Replay.add_dl_job(replay)

            # Note that we've done things to this replay.
            replays_removed.append(replay.id)

        return jsonify(
            success=True,
            replays_removed=replays_removed
        )
Esempio n. 3
0
def fix_long_waiting_download():
    """  Finds replays that have been "WAITING_DOWNLOAD" for over 24 hours, and re-adds them to the GC queue. """
    _error = "LONGEST_WAIT_OF_MY_LIFE"

    replay_waiting_download_over24hrs = Replay.query.filter(
        Replay.state == "WAITING_DOWNLOAD",
        Replay.gc_done_time <=
        (datetime.utcnow() - timedelta(hours=24))  # Over 24 hrs ago
    ).all()

    for replay in replay_waiting_download_over24hrs:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print(
            "Replay {} has been \"WAITING_DOWNLOAD\" for over 24 hours. Re-adding to DL queue."
            .format(replay.id))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Esempio n. 4
0
    def small_replay_exodus(self):
        small_replay_files = {
            replay_file.key[8:-8]: replay_file.size
            for replay_file in dotabank_bucket.list()
            if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                         1024)
        }
        small_replays = Replay.query.filter(
            Replay.id.in_(small_replay_files.keys())).all()

        replays_removed = []  # IDs of removed replays
        for replay in small_replays:
            # Save local URI so we can remove the file from S3 after we've changed the databose.
            local_uri = replay.local_uri

            # Clean up metadata associated with an archived replay.
            replay.dl_done_time = None
            replay.local_uri = None
            replay.state = "WAITING_DOWNLOAD"

            # Save ne state to database
            db.session.add(replay)
            db.session.commit()

            # Remove bad file from S3.
            dotabank_bucket.delete_key(
                local_uri or "replays/{}.dem.bz2".format(replay.id))

            # Add a new download job
            Replay.add_dl_job(replay)

            # Note that we've done things to this replay.
            replays_removed.append(replay.id)

        return jsonify(success=True, replays_removed=replays_removed)
Esempio n. 5
0
def fix_small_replays():
    """ Finds replays with a tiny filesize and re-adds them to the GC queue (we probably downloaded a error page.
    """
    _error = "SMALL_REPLAY"

    # FIXME: This step will take longer and longer the more replays we store.  It would be more efficient to store
    # the filesize in our local database after a file has been archived, and then directly query the database.
    small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list()
                          if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}

    small_replays = db.session.query(Replay, db.func.count(ReplayAutoFix.id)).filter(
        Replay.state == "ARCHIVED",                 # Ignore non-archived files (they shouldnt be in s3 if they aren't archived, but vOv)
        Replay.id.in_(small_replay_files.keys()),   # Check the replays that the S3 call above has flagged as small
        ReplayAutoFix.replay_id == Replay.id
    ).group_by(
        ReplayAutoFix.replay_id
    ).having(
        db.func.count(ReplayAutoFix.id) < app.config.get('MAX_REPLAY_FIX_ATTEMPTS')  # Ignore replays that have exceeded max fix attempts
    ).all()

    for replay, fix_attempts in small_replays:
        if not should_fix_be_attempted(replay.id, _error, extra={
            'file_size': small_replay_files[unicode(replay.id)]
        }):
            continue

        print ("Replay {} has a small file stored on s3 ({} bytes).  Re-adding to DL queue.".format(
            replay.id,
            small_replay_files[unicode(replay.id)]
        ))
        replay.state = "WAITING_GC"  # Switch state back to WAITING_GC.
        Replay.add_dl_job(replay)
Esempio n. 6
0
def fix_long_waiting_download():
    """  Finds replays that have been "WAITING_DOWNLOAD" for over 24 hours, and re-adds them to the GC queue. """
    _error = "LONGEST_WAIT_OF_MY_LIFE"

    replay_waiting_download_over24hrs = Replay.query.filter(
        Replay.state == "WAITING_DOWNLOAD",
        Replay.gc_done_time <= (datetime.utcnow() - timedelta(hours=24))  # Over 24 hrs ago
    ).all()

    for replay in replay_waiting_download_over24hrs:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue


        print ("Replay {} has been \"WAITING_DOWNLOAD\" for over 24 hours. Re-adding to DL queue.".format(
            replay.id
        ))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Esempio n. 7
0
    def requeue_waiting_downloads(self):
        waiting_downloads = Replay.query.filter(
            Replay.state == "WAITING_DOWNLOAD").all()

        done = []
        for replay in waiting_downloads:
            if Replay.add_dl_job(replay):
                done.append(replay.id)

        return jsonify(success=True, readded=done)
Esempio n. 8
0
    def requeue_waiting_downloads(self):
        waiting_downloads = Replay.query.filter(Replay.state == "WAITING_DOWNLOAD").all()

        done = []
        for replay in waiting_downloads:
            if Replay.add_dl_job(replay):
                done.append(replay.id)

        return jsonify(
            success=True,
            readded=done
        )
Esempio n. 9
0
def fix_small_replays():
    """ Finds replays with a tiny filesize and re-adds them to the GC queue (we probably downloaded a error page.
    """
    _error = "SMALL_REPLAY"

    # FIXME: This step will take longer and longer the more replays we store.  It would be more efficient to store
    # the filesize in our local database after a file has been archived, and then directly query the database.
    small_replay_files = {
        replay_file.key[8:-8]: replay_file.size
        for replay_file in dotabank_bucket.list()
        if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                     1024)
    }

    small_replays = db.session.query(
        Replay, db.func.count(ReplayAutoFix.id)
    ).filter(
        Replay.state ==
        "ARCHIVED",  # Ignore non-archived files (they shouldnt be in s3 if they aren't archived, but vOv)
        Replay.id.in_(small_replay_files.keys(
        )),  # Check the replays that the S3 call above has flagged as small
        ReplayAutoFix.replay_id == Replay.id).group_by(
            ReplayAutoFix.replay_id).having(
                db.func.count(ReplayAutoFix.id) < app.config.get(
                    'MAX_REPLAY_FIX_ATTEMPTS'
                )  # Ignore replays that have exceeded max fix attempts
            ).all()

    for replay, fix_attempts in small_replays:
        if not should_fix_be_attempted(
                replay.id,
                _error,
                extra={'file_size': small_replay_files[unicode(replay.id)]}):
            continue

        print(
            "Replay {} has a small file stored on s3 ({} bytes).  Re-adding to DL queue."
            .format(replay.id, small_replay_files[unicode(replay.id)]))
        replay.state = "WAITING_GC"  # Switch state back to WAITING_GC.
        Replay.add_dl_job(replay)
Esempio n. 10
0
def fix_missing_files():
    """ Finds replays set as "ARCHIVED" that are missing a corresponding file stored in S3. Re-adds them
        to GC queue. """
    _error = "MISSING_S3_FILE"

    all_s3_replay_ids = [replay_file.key[8:-8] for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/"]
    archived_replays_no_file = Replay.query.filter(Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

    for replay in archived_replays_no_file:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print ("Replay {} is \"ARCHIVED\" but does not have a file stored on S3. Re-adding to GC queue.".format(
            replay.id
        ))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)