Ejemplo n.º 1
0
def fix_long_waiting_download():
    """  Finds replays that have been "WAITING_DOWNLOAD" for over 24 hours, and re-adds them to the GC queue. """
    _error = "LONGEST_WAIT_OF_MY_LIFE"

    replay_waiting_download_over24hrs = Replay.query.filter(
        Replay.state == "WAITING_DOWNLOAD",
        Replay.gc_done_time <=
        (datetime.utcnow() - timedelta(hours=24))  # Over 24 hrs ago
    ).all()

    for replay in replay_waiting_download_over24hrs:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print(
            "Replay {} has been \"WAITING_DOWNLOAD\" for over 24 hours. Re-adding to DL queue."
            .format(replay.id))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Ejemplo n.º 2
0
def fix_small_replays():
    """ Finds replays with a tiny filesize and re-adds them to the GC queue (we probably downloaded a error page.
    """
    _error = "SMALL_REPLAY"

    # FIXME: This step will take longer and longer the more replays we store.  It would be more efficient to store
    # the filesize in our local database after a file has been archived, and then directly query the database.
    small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list()
                          if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}

    small_replays = db.session.query(Replay, db.func.count(ReplayAutoFix.id)).filter(
        Replay.state == "ARCHIVED",                 # Ignore non-archived files (they shouldnt be in s3 if they aren't archived, but vOv)
        Replay.id.in_(small_replay_files.keys()),   # Check the replays that the S3 call above has flagged as small
        ReplayAutoFix.replay_id == Replay.id
    ).group_by(
        ReplayAutoFix.replay_id
    ).having(
        db.func.count(ReplayAutoFix.id) < app.config.get('MAX_REPLAY_FIX_ATTEMPTS')  # Ignore replays that have exceeded max fix attempts
    ).all()

    for replay, fix_attempts in small_replays:
        if not should_fix_be_attempted(replay.id, _error, extra={
            'file_size': small_replay_files[unicode(replay.id)]
        }):
            continue

        print ("Replay {} has a small file stored on s3 ({} bytes).  Re-adding to DL queue.".format(
            replay.id,
            small_replay_files[unicode(replay.id)]
        ))
        replay.state = "WAITING_GC"  # Switch state back to WAITING_GC.
        Replay.add_dl_job(replay)
Ejemplo n.º 3
0
    def small_replay_exodus(self):
        small_replay_files = {
            replay_file.key[8:-8]: replay_file.size
            for replay_file in dotabank_bucket.list()
            if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                         1024)
        }
        small_replays = Replay.query.filter(
            Replay.id.in_(small_replay_files.keys())).all()

        replays_removed = []  # IDs of removed replays
        for replay in small_replays:
            # Save local URI so we can remove the file from S3 after we've changed the databose.
            local_uri = replay.local_uri

            # Clean up metadata associated with an archived replay.
            replay.dl_done_time = None
            replay.local_uri = None
            replay.state = "WAITING_DOWNLOAD"

            # Save ne state to database
            db.session.add(replay)
            db.session.commit()

            # Remove bad file from S3.
            dotabank_bucket.delete_key(
                local_uri or "replays/{}.dem.bz2".format(replay.id))

            # Add a new download job
            Replay.add_dl_job(replay)

            # Note that we've done things to this replay.
            replays_removed.append(replay.id)

        return jsonify(success=True, replays_removed=replays_removed)
Ejemplo n.º 4
0
def fix_long_waiting_download():
    """  Finds replays that have been "WAITING_DOWNLOAD" for over 24 hours, and re-adds them to the GC queue. """
    _error = "LONGEST_WAIT_OF_MY_LIFE"

    replay_waiting_download_over24hrs = Replay.query.filter(
        Replay.state == "WAITING_DOWNLOAD",
        Replay.gc_done_time <= (datetime.utcnow() - timedelta(hours=24))  # Over 24 hrs ago
    ).all()

    for replay in replay_waiting_download_over24hrs:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue


        print ("Replay {} has been \"WAITING_DOWNLOAD\" for over 24 hours. Re-adding to DL queue.".format(
            replay.id
        ))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Ejemplo n.º 5
0
    def small_replay_exodus(self):
        small_replay_files = {replay_file.key[8:-8]: replay_file.size for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/" and replay_file.size < (1024 * 1024)}
        small_replays = Replay.query.filter(Replay.id.in_(small_replay_files.keys())).all()

        replays_removed = []  # IDs of removed replays
        for replay in small_replays:
            # Save local URI so we can remove the file from S3 after we've changed the databose.
            local_uri = replay.local_uri

            # Clean up metadata associated with an archived replay.
            replay.dl_done_time = None
            replay.local_uri = None
            replay.state = "WAITING_DOWNLOAD"

            # Save ne state to database
            db.session.add(replay)
            db.session.commit()

            # Remove bad file from S3.
            dotabank_bucket.delete_key(local_uri or "replays/{}.dem.bz2".format(replay.id))

            # Add a new download job
            Replay.add_dl_job(replay)

            # Note that we've done things to this replay.
            replays_removed.append(replay.id)

        return jsonify(
            success=True,
            replays_removed=replays_removed
        )
Ejemplo n.º 6
0
def fix_missing_files():
    """ Finds replays set as "ARCHIVED" that are missing a corresponding file stored in S3. Re-adds them
        to GC queue. """
    _error = "MISSING_S3_FILE"

    all_s3_replay_ids = [
        replay_file.key[8:-8] for replay_file in dotabank_bucket.list()
        if replay_file.key[:8] == "replays/"
    ]
    archived_replays_no_file = Replay.query.filter(
        Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

    for replay in archived_replays_no_file:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print(
            "Replay {} is \"ARCHIVED\" but does not have a file stored on S3. Re-adding to GC queue."
            .format(replay.id))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Ejemplo n.º 7
0
def fix_incorrect_player_counts():
    """ Finds and attempts to fix all replays where `Replay.human_players` does not match the quantity of `ReplayPlayer`
    objects we have in the database.

    Attempts to fix by deleting all ReplayPlayer objects and re-adding the replay to the job queue.
    """
    _error = "PLAYER_COUNT_MISMATCH"

    human_players_discrepancy = db.engine.execute(
        text("""
            SELECT
              r.id,
              r.human_players,
              (
                SELECT count(*) FROM {player_table} rp
                WHERE rp.replay_id=r.id
                and (rp.id is NULL or rp.account_id is not NULL) # Exclude bots from count (though there's the chance we have duplicate entries for bots? fack)
              ) as player_count,
              (SELECT count(*) FROM {auto_fix_table} raf WHERE raf.replay_id=r.id) as fix_attempts
            FROM {replay_table} r
            WHERE
              r.state != "GC_ERROR"
            HAVING
            r.human_players != player_count
            and fix_attempts <= {max_fix_attempts}

        """.format(
            replay_table=Replay.__tablename__,
            player_table=ReplayPlayer.__tablename__,
            auto_fix_table=ReplayAutoFix.__tablename__,
            max_fix_attempts=app.config['MAX_REPLAY_FIX_ATTEMPTS'])
        )
    )

    for replay_id, human_count, player_count, auto_fix_attempts in human_players_discrepancy:
        if not should_fix_be_attempted(replay_id, _error, {'human_count': human_count, 'player_count': player_count}):
            continue

        print("Replay {} has a human_count of {}, but we have {} player objects for this replay.".format(
            replay_id,
            human_count,
            player_count
        ))
        replay = Replay.query.filter(Replay.id == replay_id).one()
        print("\tDeleting ReplayPlayer objects")
        for player in replay.players:
            db.session.delete(player)
        db.session.commit()

        print("\tRe-adding replay to GC queue")
        Replay.add_gc_job(replay)
Ejemplo n.º 8
0
def fix_incorrect_player_counts():
    """ Finds and attempts to fix all replays where `Replay.human_players` does not match the quantity of `ReplayPlayer`
    objects we have in the database.

    Attempts to fix by deleting all ReplayPlayer objects and re-adding the replay to the job queue.
    """
    _error = "PLAYER_COUNT_MISMATCH"

    human_players_discrepancy = db.engine.execute(
        text("""
            SELECT
              r.id,
              r.human_players,
              (
                SELECT count(*) FROM {player_table} rp
                WHERE rp.replay_id=r.id
                and (rp.id is NULL or rp.account_id is not NULL) # Exclude bots from count (though there's the chance we have duplicate entries for bots? fack)
              ) as player_count,
              (SELECT count(*) FROM {auto_fix_table} raf WHERE raf.replay_id=r.id) as fix_attempts
            FROM {replay_table} r
            WHERE
              r.state != "GC_ERROR"
            HAVING
            r.human_players != player_count
            and fix_attempts <= {max_fix_attempts}

        """.format(replay_table=Replay.__tablename__,
                   player_table=ReplayPlayer.__tablename__,
                   auto_fix_table=ReplayAutoFix.__tablename__,
                   max_fix_attempts=app.config['MAX_REPLAY_FIX_ATTEMPTS'])))

    for replay_id, human_count, player_count, auto_fix_attempts in human_players_discrepancy:
        if not should_fix_be_attempted(replay_id, _error, {
                'human_count': human_count,
                'player_count': player_count
        }):
            continue

        print(
            "Replay {} has a human_count of {}, but we have {} player objects for this replay."
            .format(replay_id, human_count, player_count))
        replay = Replay.query.filter(Replay.id == replay_id).one()
        print("\tDeleting ReplayPlayer objects")
        for player in replay.players:
            db.session.delete(player)
        db.session.commit()

        print("\tRe-adding replay to GC queue")
        Replay.add_gc_job(replay)
Ejemplo n.º 9
0
def archive_subscriber_matches():
    subscriptions = Subscription.get_valid_subscriptions()

    print "Found {} valid subscribers".format(len(subscriptions))
    for subscription in subscriptions:
        webapi_params = {
            "account_id": subscription.user_id,
            "date_min": None,
            "matches_requested": 100  # 100 Max
        }

        latest_match = SubscriptionLastMatch.query.\
            filter(SubscriptionLastMatch.user_id == subscription.user_id,
                   SubscriptionLastMatch.replay_found == True).\
            order_by(SubscriptionLastMatch.created_at.desc()).\
            first()

        if latest_match:
            webapi_params["date_min"] = latest_match.created_at_timestamp
        else:
            webapi_params["date_min"] = subscription.created_at_timestamp

        matches = steam.api.interface("IDOTA2Match_570").GetMatchHistory(
            **webapi_params).get("result")

        # Log this match check, as well as whether or not we found a match.
        last_match_log = SubscriptionLastMatch(subscription.user_id,
                                               len(matches.get("matches")) > 0)
        db.session.add(last_match_log)
        db.session.commit()

        print "Found {} matches for {}".format(len(matches.get("matches")),
                                               subscription.user_id)
        for match in matches.get("matches"):
            replay_exists = Replay.query.filter(
                Replay.id == match["match_id"]).count() > 0

            if not replay_exists:
                replay = Replay(match["match_id"])
                db.session.add(replay)
                db.session.commit()

                Replay.add_gc_job(replay)

                print "Added {} to database and job queue".format(
                    match["match_id"])
            else:
                print "Match {} already in database, skipping.".format(
                    match["match_id"])
Ejemplo n.º 10
0
def process_match_list(matches):
    """ Iterates through a list ofmatches and checks whether we already have them in our database. If we do not then
    this code will add the match to our database and create an associated GC job. """
    if len(matches) > 0:
        for match in matches:
            replay_exists = Replay.query.filter(Replay.id == match["match_id"]).count() > 0

            if not replay_exists:
                replay = Replay(match["match_id"])
                db.session.add(replay)
                db.session.commit()

                Replay.add_gc_job(replay)

                print "Added {} to database and job queue".format(match["match_id"])
            else:
                print "Match {} already in database, skipping.".format(match["match_id"])
Ejemplo n.º 11
0
    def requeue_waiting_downloads(self):
        waiting_downloads = Replay.query.filter(
            Replay.state == "WAITING_DOWNLOAD").all()

        done = []
        for replay in waiting_downloads:
            if Replay.add_dl_job(replay):
                done.append(replay.id)

        return jsonify(success=True, readded=done)
def archive_subscriber_matches():
    subscriptions = Subscription.get_valid_subscriptions()

    print "Found {} valid subscribers".format(len(subscriptions))
    for subscription in subscriptions:
        webapi_params = {
            "account_id": subscription.user_id,
            "date_min": None,
            "matches_requested": 100  # 100 Max
        }

        latest_match = SubscriptionLastMatch.query.\
            filter(SubscriptionLastMatch.user_id == subscription.user_id,
                   SubscriptionLastMatch.replay_found == True).\
            order_by(SubscriptionLastMatch.created_at.desc()).\
            first()

        if latest_match:
            webapi_params["date_min"] = latest_match.created_at_timestamp
        else:
            webapi_params["date_min"] = subscription.created_at_timestamp

        matches = steam.api.interface("IDOTA2Match_570").GetMatchHistory(**webapi_params).get("result")

        # Log this match check, as well as whether or not we found a match.
        last_match_log = SubscriptionLastMatch(subscription.user_id, len(matches.get("matches")) > 0)
        db.session.add(last_match_log)
        db.session.commit()

        print "Found {} matches for {}".format(len(matches.get("matches")), subscription.user_id)
        for match in matches.get("matches"):
            replay_exists = Replay.query.filter(Replay.id == match["match_id"]).count() > 0

            if not replay_exists:
                replay = Replay(match["match_id"])
                db.session.add(replay)
                db.session.commit()

                Replay.add_gc_job(replay)

                print "Added {} to database and job queue".format(match["match_id"])
            else:
                print "Match {} already in database, skipping.".format(match["match_id"])
Ejemplo n.º 13
0
def process_match_list(matches):
    """ Iterates through a list ofmatches and checks whether we already have them in our database. If we do not then
    this code will add the match to our database and create an associated GC job. """
    if len(matches) > 0:
        for match in matches:
            replay_exists = Replay.query.filter(
                Replay.id == match["match_id"]).count() > 0

            if not replay_exists:
                replay = Replay(match["match_id"])
                db.session.add(replay)
                db.session.commit()

                Replay.add_gc_job(replay)

                print "Added {} to database and job queue".format(
                    match["match_id"])
            else:
                print "Match {} already in database, skipping.".format(
                    match["match_id"])
Ejemplo n.º 14
0
    def requeue_waiting_downloads(self):
        waiting_downloads = Replay.query.filter(Replay.state == "WAITING_DOWNLOAD").all()

        done = []
        for replay in waiting_downloads:
            if Replay.add_dl_job(replay):
                done.append(replay.id)

        return jsonify(
            success=True,
            readded=done
        )
Ejemplo n.º 15
0
def fix_small_replays():
    """ Finds replays with a tiny filesize and re-adds them to the GC queue (we probably downloaded a error page.
    """
    _error = "SMALL_REPLAY"

    # FIXME: This step will take longer and longer the more replays we store.  It would be more efficient to store
    # the filesize in our local database after a file has been archived, and then directly query the database.
    small_replay_files = {
        replay_file.key[8:-8]: replay_file.size
        for replay_file in dotabank_bucket.list()
        if replay_file.key[:8] == "replays/" and replay_file.size < (1024 *
                                                                     1024)
    }

    small_replays = db.session.query(
        Replay, db.func.count(ReplayAutoFix.id)
    ).filter(
        Replay.state ==
        "ARCHIVED",  # Ignore non-archived files (they shouldnt be in s3 if they aren't archived, but vOv)
        Replay.id.in_(small_replay_files.keys(
        )),  # Check the replays that the S3 call above has flagged as small
        ReplayAutoFix.replay_id == Replay.id).group_by(
            ReplayAutoFix.replay_id).having(
                db.func.count(ReplayAutoFix.id) < app.config.get(
                    'MAX_REPLAY_FIX_ATTEMPTS'
                )  # Ignore replays that have exceeded max fix attempts
            ).all()

    for replay, fix_attempts in small_replays:
        if not should_fix_be_attempted(
                replay.id,
                _error,
                extra={'file_size': small_replay_files[unicode(replay.id)]}):
            continue

        print(
            "Replay {} has a small file stored on s3 ({} bytes).  Re-adding to DL queue."
            .format(replay.id, small_replay_files[unicode(replay.id)]))
        replay.state = "WAITING_GC"  # Switch state back to WAITING_GC.
        Replay.add_dl_job(replay)
Ejemplo n.º 16
0
def fix_missing_files():
    """ Finds replays set as "ARCHIVED" that are missing a corresponding file stored in S3. Re-adds them
        to GC queue. """
    _error = "MISSING_S3_FILE"

    all_s3_replay_ids = [replay_file.key[8:-8] for replay_file in dotabank_bucket.list() if replay_file.key[:8] == "replays/"]
    archived_replays_no_file = Replay.query.filter(Replay.state == 'ARCHIVED', Replay.id.notin_(all_s3_replay_ids)).all()

    for replay in archived_replays_no_file:
        if not should_fix_be_attempted(replay.id, _error):
            # Tag as "DOWNLOAD_ERROR" because we can't fix this - the problem is entirely in Valve (or their partners) domain.
            replay.state = "DOWNLOAD_ERROR"
            replay.local_uri = None
            replay.dl_done_time = None
            db.session.add(replay)
            db.session.commit()
            continue

        print ("Replay {} is \"ARCHIVED\" but does not have a file stored on S3. Re-adding to GC queue.".format(
            replay.id
        ))
        replay.state = "WAITING_DOWNLOAD"  # Switch state back to WAITING_DOWNLOAD.
        Replay.add_dl_job(replay)
Ejemplo n.º 17
0
#!/srv/www/dotabank.com/dotabank-web/bin/python
"""
Downoad and archive TI4 matches
"""

from app import steam, db  # .info
from app.replays.models import Replay, ReplayPlayer

THE_INTERNATIONAL_4_ID = 600

matches = steam.api.interface("IDOTA2Match_570").GetMatchHistory(league_id=THE_INTERNATIONAL_4_ID).get("result")

if matches:
    for match in matches.get("matches"):
        replay_exists = Replay.query.filter(Replay.id == match["match_id"]).count() > 0

        if not replay_exists:
            replay = Replay(match["match_id"])
            db.session.add(replay)
            db.session.commit()

            Replay.add_gc_job(replay)

            print "Added {} to database and job queue".format(match["match_id"])
        else:
            print "Match {} already in database, skipping.".format(match["match_id"])