Exemple #1
0
def main(parsed_args):
    db = utils.get_mongo_database()
    goal_db = db.goals
    gstats_db = db.goal_stats
    all_goals = goals.goal_check_funcs.keys()
    total_pcount = collections.defaultdict(int)
    goal_scanner = incremental_scanner.IncrementalScanner('goals', db)
    stat_scanner = incremental_scanner.IncrementalScanner('goal_stats', db)

    if not parsed_args.incremental:
        log.warning('resetting scanner and db')
        stat_scanner.reset()
        gstats_db.remove()

    log.info("Starting run: %s", stat_scanner.status_msg())

    # TODO: The following logic doesn't work now that goal calculation doesn't happen with a scanner.
    # if goal_scanner.get_max_game_id() == stat_scanner.get_max_game_id():
    #     log.info("Stats already set! Skip")
    #     exit(0)

    log.info('all_goals %s', all_goals)
    for goal_name in all_goals:
        log.info("Working on %s", goal_name)
        found_goals_cursor = goal_db.find({'goals.goal_name': goal_name},
                                          {'goals.player': 1, '_id': 0})
        total = found_goals_cursor.count()
        log.info("Found %d instances of %s", total, goal_name)

        pcount = collections.defaultdict(int)
        for goal in found_goals_cursor:
            player = goal['goals'][0]['player']
            pcount[player] += 1
            total_pcount[player] += 1

        psorted = sorted(pcount.iteritems(), key=operator.itemgetter(1), 
                         reverse=True)
        top = []
        leaders = 0
        i = 0
        while leaders < 3 and i < len(psorted):
            (player, count) = psorted[i]
            players = []
            if player not in AIs.names:
                players = [player]
            i += 1
            while i < len(psorted) and psorted[i][1] == count:
                if psorted[i][0] not in AIs.names:
                    players.append(psorted[i][0])
                i += 1
            leaders += len(players)
            if len(players) > 0:
                top.append((players, count))
			
        mongo_val = {'_id': goal_name, 'count': total, 'top': top}
        gstats_db.save(mongo_val)

    stat_scanner.set_max_game_id(goal_scanner.get_max_game_id())
    stat_scanner.save()
    log.info("Ending run: %s", stat_scanner.status_msg())
def main():
    utils.ensure_exists(output_directory)

    date_of_last_cached_leaderboard = get_date_of_last_cached_leaderboard()
    log.info('date of the last cached leaderboard is %s', date_of_last_cached_leaderboard)

    date_of_last_goko_leaderboard = datetime.date.today()

    one_day_delta = datetime.timedelta(1)
    date = date_of_last_cached_leaderboard + one_day_delta

    while date <= datetime.date.today():
        log.info('Processing %s', date)

        if date == datetime.date.today():
            log.info('scraping from goko')
            status = run_scrape_function_with_retries(scrape_leaderboard_from_goko, date)
        else:
            log.info('scraping from councilroom')
            status = run_scrape_function_with_retries(scrape_leaderboard_from_councilroom, date)

            if status != 200 and date <= datetime.date(2013,01,01):
                log.info('scraping from bggdl')
                status = run_scrape_function_with_retries(scrape_leaderboard_from_bggdl, date)

        if status == 200:
            pass
        elif status == 404:
            log.warning('file not found, so we will assume that it does not exist, and go to the next day')
        else:
            log.warning('Unexpected status of %d, please try again later', status)
            break

        date += one_day_delta
Exemple #3
0
def main(parsed_args):
    db = utils.get_mongo_database()
    goal_db = db.goals
    gstats_db = db.goal_stats
    all_goals = goals.goal_check_funcs.keys()
    total_pcount = collections.defaultdict(int)
    goal_scanner = incremental_scanner.IncrementalScanner('goals', db)
    stat_scanner = incremental_scanner.IncrementalScanner('goal_stats', db)

    if not parsed_args.incremental:
        log.warning('resetting scanner and db')
        stat_scanner.reset()
        gstats_db.remove()

    log.info("Starting run: %s", stat_scanner.status_msg())

    # TODO: The following logic doesn't work now that goal calculation doesn't happen with a scanner.
    # if goal_scanner.get_max_game_id() == stat_scanner.get_max_game_id():
    #     log.info("Stats already set! Skip")
    #     exit(0)

    log.info('all_goals %s', all_goals)
    for goal_name in all_goals:
        log.info("Working on %s", goal_name)
        found_goals_cursor = goal_db.find({'goals.goal_name': goal_name}, {
            'goals.player': 1,
            '_id': 0
        })
        total = found_goals_cursor.count()
        log.info("Found %d instances of %s", total, goal_name)

        pcount = collections.defaultdict(int)
        for goal in found_goals_cursor:
            player = goal['goals'][0]['player']
            pcount[player] += 1
            total_pcount[player] += 1

        psorted = sorted(pcount.iteritems(),
                         key=operator.itemgetter(1),
                         reverse=True)
        top = []
        leaders = 0
        i = 0
        while leaders < 3 and i < len(psorted):
            (player, count) = psorted[i]
            players = [player]
            i += 1
            while i < len(psorted) and psorted[i][1] == count:
                players.append(psorted[i][0])
                i += 1
            leaders += len(players)
            top.append((players, count))

        mongo_val = {'_id': goal_name, 'count': total, 'top': top}
        gstats_db.save(mongo_val)

    stat_scanner.set_max_game_id(goal_scanner.get_max_game_id())
    stat_scanner.save()
    log.info("Ending run: %s", stat_scanner.status_msg())
Exemple #4
0
def main(parsed_args):
    """ Scan and update buy data"""
    start = time.time()
    db = utils.get_mongo_database()
    games = db.games
    output_db = db

    overall_stats = DeckBuyStats()

    scanner = incremental_scanner.IncrementalScanner(BUYS_COL_NAME, output_db)
    buy_collection = output_db[BUYS_COL_NAME]

    if not parsed_args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()
        buy_collection.drop()

    start_size = scanner.get_num_games()
    log.info("Starting run: %s", scanner.status_msg())
    do_scan(scanner, games, overall_stats, parsed_args.max_games)
    log.info("Ending run: %s", scanner.status_msg())
    end_size = scanner.get_num_games()

    if parsed_args.incremental:
        existing_overall_data = DeckBuyStats()
        utils.read_object_from_db(existing_overall_data, buy_collection, '')
        overall_stats.merge(existing_overall_data)
        def deck_freq(data_set):
            return data_set[dominioncards.Estate].available.frequency()
        log.info('existing %s decks', deck_freq(existing_overall_data))
        log.info('after merge %s decks', deck_freq(overall_stats))

    utils.write_object_to_db(overall_stats, buy_collection, '')

    scanner.save()
def run_scrape_function_with_retries(scrape_function, date):
    num_attempts = 0

    while True:
        num_attempts += 1

        status = scrape_function(date)

        if status == 200:
            log.info('successful')
            break
        elif status == 404:
            log.info('file not found')
            break
        elif status == 'leaderboard updated':
            log.warning('the leaderboard was updated after this script was started, so re-run this script')
            break
        else:
            if num_attempts < 3:
                log.info('Status was %s, retrying', status)
            else:
                log.error('reached 3 attempts, aborting')
                break

    return status
def run_scrape_function_with_retries(scrape_function, date):
    num_attempts = 0

    while True:
        num_attempts += 1

        status = scrape_function(date)

        if status == 200:
            log.info('successful')
            break
        elif status == 404:
            log.info('file not found')
            break
        elif status == 'leaderboard updated':
            log.warning('the leaderboard was updated after this script was started, so re-run this script')
            break
        else:
            if num_attempts < 3:
                log.info('Status was %s, retrying', status)
            else:
                log.error('reached 3 attempts, aborting')
                break

    return status
def main(args):
    commit_after = 25000
    database = utils.get_mongo_database()
    games = database.games
    collection = database.optimal_card_ratios
    db_tracker = None

    scanner = incremental_scanner.IncrementalScanner('optimal_card_ratios',
                                                     database)

    if not args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()

    log.info("Starting run: %s", scanner.status_msg())

    for ind, game in enumerate(utils.progress_meter(scanner.scan(games, {}))):
        if not db_tracker:
            log.debug("Initializing db tracker manager")
            db_tracker = DBCardRatioTrackerManager(collection,
                                                   args.incremental)
            log.debug("DB tracker manager initialized")

        result = process_game(Game(game))
        for final_ratio_dict, progressive_ratio_dict, win_points in result:
            db_tracker.integrate_results('final', final_ratio_dict, win_points)
            db_tracker.integrate_results('progressive', progressive_ratio_dict,
                                         win_points)

        if args.max_games >= 0 and ind >= args.max_games:
            log.info("Reached max_games of %d", args.max_games)
            break

        if ind % commit_after == 0 and ind > 0:
            start = time.time()
            db_tracker.save()
            scanner.save()
            log.info("Committed calculations to the DB in %5.2fs",
                     time.time() - start)

    log.info("Ending run: %s", scanner.status_msg())

    if db_tracker:
        db_tracker.save()
    scanner.save()
def main(args):
    commit_after = 25000
    database = utils.get_mongo_database()
    games = database.games
    collection = database.optimal_card_ratios
    db_tracker = None

    scanner = incremental_scanner.IncrementalScanner('optimal_card_ratios', database)

    if not args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()

    log.info("Starting run: %s", scanner.status_msg())

    for ind, game in enumerate(
        utils.progress_meter(scanner.scan(games, {}))):
        if not db_tracker:
            log.debug("Initializing db tracker manager")
            db_tracker = DBCardRatioTrackerManager(collection, args.incremental)
            log.debug("DB tracker manager initialized")

        result = process_game(Game(game))
        for final_ratio_dict, progressive_ratio_dict, win_points in result:
            db_tracker.integrate_results('final', final_ratio_dict, win_points)
            db_tracker.integrate_results('progressive', progressive_ratio_dict, win_points)

        if args.max_games >= 0 and ind >= args.max_games:
            log.info("Reached max_games of %d", args.max_games)
            break

        if ind % commit_after == 0 and ind > 0:
            start = time.time()
            db_tracker.save()
            scanner.save()
            log.info("Committed calculations to the DB in %5.2fs", time.time() - start)

    log.info("Ending run: %s", scanner.status_msg())

    if db_tracker:
        db_tracker.save()
    scanner.save()
def main(args):
    """ Update analysis statistics.  By default, do so incrementally, unless
    --noincremental argument is given."""

    commit_after = 25000

    database = utils.get_mongo_database()
    games = database.games

    output_collection_name = 'analysis'
    output_collection = database[output_collection_name]
    game_analysis = GamesAnalysis()

    scanner = incremental_scanner.IncrementalScanner(output_collection_name,
                                                     database)

    if args.incremental:
        utils.read_object_from_db(game_analysis, output_collection, '')
    else:
        log.warning('resetting scanner and db')
        scanner.reset()

    output_file_name = 'static/output/all_games_card_stats.js'

    if not os.path.exists('static/output'):
        os.makedirs('static/output')

    log.info("Starting run: %s", scanner.status_msg())

    for idx, raw_game in enumerate(
            utils.progress_meter(scanner.scan(games, {}))):
        try:
            game_analysis.analyze_game(Game(raw_game))

            if args.max_games >= 0 and idx >= args.max_games:
                log.info("Reached max_games of %d", args.max_games)
                break

            if idx % commit_after == 0 and idx > 0:
                start = time.time()
                game_analysis.max_game_id = scanner.get_max_game_id()
                game_analysis.num_games = scanner.get_num_games()
                utils.write_object_to_db(game_analysis, output_collection, '')
                scanner.save()
                log.info("Committed calculations to the DB in %5.2fs",
                         time.time() - start)

        except int, exception:
            log.exception('Exception occurred for %s in raw game %s',
                          Game(raw_game).isotropic_url(), raw_game)
            raise
Exemple #10
0
def main(args):
    db = utils.get_mongo_database()
    scanner = incremental_scanner.IncrementalScanner('analyze2', db)

    if not args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()
        for collection_name, _ in event_detectors:
            db[collection_name].drop()

    log.info("Starting run: %s", scanner.status_msg())
    games_stream = analysis_util.games_stream(scanner, db.games)
    accumulator = EventAccumulator()
    accumulate_card_stats(games_stream, accumulator, args.max_games)

    log.info('saving to database')
    log.debug('saving accumulated stats')
    accumulator.update_db(db)
    log.info('saving the game scanner state')
    scanner.save()
    log.info("Ending run: %s", scanner.status_msg())
Exemple #11
0
def main(args):
    db = utils.get_mongo_database()
    scanner = incremental_scanner.IncrementalScanner('analyze2', db)

    if not args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()
        for collection_name, _ in event_detectors:
            db[collection_name].drop()

    log.info("Starting run: %s", scanner.status_msg())
    games_stream = analysis_util.games_stream(scanner, db.games)
    accumulator = EventAccumulator()
    accumulate_card_stats(games_stream, accumulator, args.max_games)

    log.info('saving to database')
    log.debug('saving accumulated stats')
    accumulator.update_db(db)
    log.info('saving the game scanner state')
    scanner.save()
    log.info("Ending run: %s", scanner.status_msg())
Exemple #12
0
def main(args):
    """ Update analysis statistics.  By default, do so incrementally, unless
    --noincremental argument is given."""

    commit_after = 25000

    database = utils.get_mongo_database()
    games = database.games

    output_collection_name = 'analysis'
    output_collection = database[output_collection_name]
    game_analysis = GamesAnalysis()

    scanner = incremental_scanner.IncrementalScanner(output_collection_name,
                                                     database)
 
    if args.incremental:
        utils.read_object_from_db(game_analysis, output_collection, '')
    else:
        log.warning('resetting scanner and db')
        scanner.reset()

    output_file_name = 'static/output/all_games_card_stats.js'

    if not os.path.exists('static/output'):
        os.makedirs('static/output')

    log.info("Starting run: %s", scanner.status_msg())

    for idx, raw_game in enumerate(utils.progress_meter(scanner.scan(games, {}))):
        try:
            game_analysis.analyze_game(Game(raw_game))

            if args.max_games >= 0 and idx >= args.max_games:
                log.info("Reached max_games of %d", args.max_games)
                break

            if idx % commit_after == 0 and idx > 0:
                start = time.time()
                game_analysis.max_game_id = scanner.get_max_game_id()
                game_analysis.num_games = scanner.get_num_games()
                utils.write_object_to_db(game_analysis, output_collection, '')
                scanner.save()
                log.info("Committed calculations to the DB in %5.2fs", time.time() - start)

        except int, exception:
            log.exception('Exception occurred for %s in raw game %s', Game(raw_game).isotropic_url(), raw_game)
            raise 
Exemple #13
0
def watch_and_log(signature, log_interval=15, timeout=600):
    """Invoke the celery task via the passed signature, wait for it an
    all its children to complete, and log progress along the way.

    log_interval: number of seconds between checking and logging the
    status

    timeout: number of seconds after which to return, when there have
    been no subtask status updates"""
    task_name = signature.task
    log.info("Calling background task %s", task_name)

    async_result = signature.apply_async()

    all_done = False
    last_status_summary = None
    last_status_update = time.time()
    while not all_done:
        # Wait for the log_interval, then check the status
        time.sleep(log_interval)

        c = collections.Counter()
        try:
            # Setting intermediate to False should cause the
            # IncompleteStream exception to be thrown if the task and
            # its children aren't all complete.
            for parent, child in async_result.iterdeps(intermediate=False):
                c[child.state] += 1
            all_done = True
        except celery.exceptions.IncompleteStream:
            status_summary = summarize_task_status(c)
            log.info("Waiting for %s: %s", task_name, status_summary)

            # Check on timeout condition
            if (last_status_summary is not None
                and status_summary == last_status_summary
                and (time.time() - last_status_update) > timeout):
                break
            else:
                last_status_summary = status_summary
                last_status_update = time.time()

    if all_done:
        log.info("Done with background task %s: %s", task_name, summarize_task_status(c))
    else:
        log.warning("Returning due to timeout during background task %s: %s", task_name, summarize_task_status(c))
    return async_result
Exemple #14
0
def watch_and_log(signature, log_interval=15, timeout=600):
    """Invoke the celery task via the passed signature, wait for it an
    all its children to complete, and log progress along the way.

    log_interval: number of seconds between checking and logging the
    status

    timeout: number of seconds after which to return, when there have
    been no subtask status updates"""
    task_name = signature.task
    log.info("Calling background task %s", task_name)

    async_result = signature.apply_async()

    all_done = False
    last_status_summary = None
    last_status_update = time.time()
    while not all_done:
        # Wait for the log_interval, then check the status
        time.sleep(log_interval)

        c = collections.Counter()
        try:
            # Setting intermediate to False should cause the
            # IncompleteStream exception to be thrown if the task and
            # its children aren't all complete.
            for parent, child in async_result.iterdeps(intermediate=False):
                c[child.state] += 1
            all_done = True
        except celery.exceptions.IncompleteStream:
            status_summary = summarize_task_status(c)
            log.info("Waiting for %s: %s", task_name, status_summary)

            # Check on timeout condition
            if (last_status_summary is not None
                and status_summary == last_status_summary
                and (time.time() - last_status_update) > timeout):
                break
            else:
                last_status_summary = status_summary
                last_status_update = time.time()

    if all_done:
        log.info("Done with background task %s: %s", task_name, summarize_task_status(c))
    else:
        log.warning("Returning due to timeout during background task %s: %s", task_name, summarize_task_status(c))
    return async_result
Exemple #15
0
def main(parsed_args):
    """ Scan and update buy data"""
    start = time.time()
    db = utils.get_mongo_database()
    games = db.games
    output_db = db

    overall_stats = DeckBuyStats()

    scanner = incremental_scanner.IncrementalScanner(BUYS_COL_NAME, output_db)
    buy_collection = output_db[BUYS_COL_NAME]

    if not parsed_args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()
        buy_collection.drop()

    start_size = scanner.get_num_games()
    log.info("Starting run: %s", scanner.status_msg())
    do_scan(scanner, games, overall_stats, parsed_args.max_games)
    log.info("Ending run: %s", scanner.status_msg())
    end_size = scanner.get_num_games()

    if parsed_args.incremental:
        existing_overall_data = DeckBuyStats()
        utils.read_object_from_db(existing_overall_data, buy_collection, '')
        overall_stats.merge(existing_overall_data)

        def deck_freq(data_set):
            return data_set[dominioncards.Estate].available.frequency()

        log.info('existing %s decks', deck_freq(existing_overall_data))
        log.info('after merge %s decks', deck_freq(overall_stats))

    utils.write_object_to_db(overall_stats, buy_collection, '')

    scanner.save()
Exemple #16
0
def run_trueskill_openings(args, db, log, commit_after=25000):
    games = db.games


    collection = db.trueskill_openings
    player_collection = db.trueskill_players
    # player_collection.remove()
    # collection.remove()
    setup_openings_collection(collection)
    # setup_openings_collection(player_collection)

    opening_skill_table = DbBackedSkillTable(collection)
    # player_skill_table = DbBackedSkillTable(player_collection)

    scanner = incremental_scanner.IncrementalScanner('trueskill', db)
    log.info("Starting run: %s", scanner.status_msg())
    if not args.incremental:
        log.warning('resetting scanner and db')
        scanner.reset()
        collection.drop()

    for ind, game in enumerate(
        utils.progress_meter(scanner.scan(db.games, {}))):
        if ( len(game[DECKS]) >= 2 and len(game[DECKS][1][TURNS]) >= 5 and (RATING_SYSTEM not in game or (RATING_SYSTEM in game and 'adventure' not in game[RATING_SYSTEM] and 'unknown' not in game[RATING_SYSTEM]))):
            update_skills_for_game(game, opening_skill_table)
                                   
        if ind == args.max_games:
            break

        if ind % commit_after == 0 and ind > 0:
            start = time.time()
            #player_skill_table.save()
            opening_skill_table.save()
            scanner.save()
            log.info("Committed calculations to the DB in %5.2fs", time.time() - start)

    #player_skill_table.save()
    opening_skill_table.save()
    scanner.save()
    log.info("Ending run: %s", scanner.status_msg())
Exemple #17
0
                start = time.time()
                game_analysis.max_game_id = scanner.get_max_game_id()
                game_analysis.num_games = scanner.get_num_games()
                utils.write_object_to_db(game_analysis, output_collection, '')
                scanner.save()
                log.info("Committed calculations to the DB in %5.2fs",
                         time.time() - start)

        except int, exception:
            log.exception('Exception occurred for %s in raw game %s',
                          Game(raw_game).isotropic_url(), raw_game)
            raise

    game_analysis.max_game_id = scanner.get_max_game_id()
    game_analysis.num_games = scanner.get_num_games()
    utils.write_object_to_db(game_analysis, output_collection, '')

    output_file = open(output_file_name, 'w')
    output_file.write('var all_card_data = ')

    json.dump(game_analysis.to_primitive_object(), output_file)
    log.info("Ending run: %s", scanner.status_msg())
    scanner.save()


if __name__ == '__main__':
    parser = utils.incremental_max_parser()
    args = parser.parse_args()
    dominionstats.utils.log.initialize_logging(args.debug)
    main(args)
Exemple #18
0
            if idx % commit_after == 0 and idx > 0:
                start = time.time()
                game_analysis.max_game_id = scanner.get_max_game_id()
                game_analysis.num_games = scanner.get_num_games()
                utils.write_object_to_db(game_analysis, output_collection, '')
                scanner.save()
                log.info("Committed calculations to the DB in %5.2fs", time.time() - start)

        except int, exception:
            log.exception('Exception occurred for %s in raw game %s', Game(raw_game).isotropic_url(), raw_game)
            raise 

    game_analysis.max_game_id = scanner.get_max_game_id()
    game_analysis.num_games = scanner.get_num_games()
    utils.write_object_to_db(game_analysis, output_collection, '')

    output_file = open(output_file_name, 'w')
    output_file.write('var all_card_data = ')

    json.dump(game_analysis.to_primitive_object(), output_file)
    log.info("Ending run: %s", scanner.status_msg())
    scanner.save()


if __name__ == '__main__':
    parser = utils.incremental_max_parser()
    args = parser.parse_args()
    dominionstats.utils.log.initialize_logging(args.debug)
    main(args)
Exemple #19
0
def print_totals(checker_output, total):
    for goal_name, count in sorted(checker_output.iteritems(),
                                   key=lambda t: t[1],
                                   reverse=True):
        log.info("Totals: %-15s %8d %5.2f", goal_name, count,
                 count / float(total))
def main():
    filename_pattern = re.compile(r'^(?P<date>\d\d\d\d-\d\d-\d\d)\.html\.bz2$')
    iso_leaderboard_pattern = re.compile(r'<td>(?P<skill_mean>-?\d+\.\d+) &plusmn; ' + \
                                     r'(?P<skill_error>-?\d+\.\d+)</td><td class=c2>' + \
                                     r'(?P<rank>\d+)</td><td class=c>' + \
                                     r'(?P<eligible_games_played>\d+)</td><td>' + \
                                     r'(?P<nickname>[^<]*) <')
    goko_leaderboard_pattern = re.compile(r'\s+<td class="leaders-table-item table-item-rank">(?P<rank>\d+)</td>\s*\n' + r'\s*<td class="leaders-table-item table-item-name"><img [^>]*>(?P<nickname>.*)</td>\s*\n' + r'\s*<td class="leaders-table-item table-item-points">(?P<skill_mean>\d+)</td>')

    database = utils.get_mongo_database()
    history_collection = database.leaderboard_history
    scanner_collection = database.scanner

    db_val = scanner_collection.find_one({'_id': 'leaderboard_history'})
    last_date = db_val['last_date'] if db_val else '0000-00-00'

    directory = 'static/leaderboard/'

    filenames = os.listdir(directory)
    filenames.sort()

    bad_leaderboard_dates = utils.get_bad_leaderboard_dates()

    for filename in filenames:
        match = filename_pattern.search(filename)
        if not match:
            continue

        date = match.group('date')

        if date in bad_leaderboard_dates:
            # don't load data from when the leaderboard was messed up
            log.warning("Skipping %s because the leaderboard was messed up", date)
            continue

        if date <= last_date:
            log.warning("Date %s is less than last date %s", date, last_date)
            continue

        log.info('Processing %s', date)

        file_obj = bz2.BZ2File(directory + filename)
        content = file_obj.read().decode('utf-8')
        file_obj.close()

        nickname_to_entry = {}
        num_matches = 0
        last_rank = -1

        pos = 0
        while True:
            match = iso_leaderboard_pattern.search(content, pos)
            if not match:
                break

            num_matches += 1
            skill_mean = float(match.group('skill_mean'))
            skill_error = float(match.group('skill_error'))
            rank = int(match.group('rank'))
            eligible_games_played = int(match.group('eligible_games_played'))
            nickname = match.group('nickname')

            normed_nickname = name_merger.norm_name(nickname)

            if normed_nickname not in nickname_to_entry:
                nickname_to_entry[normed_nickname] = [date, skill_mean, skill_error, rank, eligible_games_played]
            else:
                log.info('normed nickname %s already exists for %s', normed_nickname, date)

            last_rank = rank
            pos = match.end()

        pos = 0
        while True:
            match = goko_leaderboard_pattern.search(content, pos)
            if not match:
                break

            num_matches += 1
            skill_mean = float(match.group('skill_mean'))
            skill_error = 0
            rank = int(match.group('rank'))
            eligible_games_played = 0
            nickname = match.group('nickname')

            normed_nickname = nickname

            if normed_nickname not in nickname_to_entry:
                nickname_to_entry[normed_nickname] = [date, skill_mean, skill_error, rank, eligible_games_played]
            else:
                log.info('normed nickname %s already exists for %s', normed_nickname, date)

            last_rank = rank
            pos = match.end()
        log.info('%d entries matched', num_matches)

        if num_matches == 0:
            log.error('No entries found, so the regex is probably not doing its job anymore.')
            break

        if num_matches != last_rank:
            log.error('ERROR: # entries does not match last rank, so the regex is probably not doing its job anymore.')
            break

        for nickname, data in nickname_to_entry.iteritems():
            history_collection.update({'_id': nickname}, {'$push': {'history': data}}, upsert=True)

        log.info('%d player histories updated', len(nickname_to_entry))

        last_date = date

    scanner_collection.update({'_id': 'leaderboard_history'}, {'$set': {'last_date': last_date}}, upsert=True)
Exemple #21
0
def main(args):
    db = utils.get_mongo_database()
    games_collection = db.games
    output_collection = db.goals
    total_checked = 0

    checker_output = collections.defaultdict(int)

    if args.goals:
        valid_goals = True
        for goal_name in args.goals:
            if goal_name not in goal_check_funcs:
                valid_goals = False
                log.error("Unrecognized goal name '%s'", goal_name)
        if not valid_goals:
            exit(-1)
        goals_to_check = args.goals

        scanner = incremental_scanner.IncrementalScanner('subgoals', db)
        scanner.reset()
        main_scanner = incremental_scanner.IncrementalScanner('goals', db)
        last = main_scanner.get_max_game_id()
    else:
        goals_to_check = None
        scanner = incremental_scanner.IncrementalScanner('goals', db)
        last = None

    if not args.incremental:
        scanner.reset()
        output_collection.remove()
    output_collection.ensure_index('goals.player')

    log.info("Starting run: %s", scanner.status_msg())

    for g in utils.progress_meter(scanner.scan(games_collection, {})):
        total_checked += 1
        game_val = game.Game(g)

        # Get existing goal set (if exists)
        game_id = game_val.get_id()
        mongo_val = output_collection.find_one({'_id': game_id})

        if mongo_val is None:
            mongo_val = collections.defaultdict(dict)
            mongo_val['_id'] = game_id
            mongo_val['goals'] = []

        # If rechecking, delete old values
        if goals_to_check is not None:
            goals = mongo_val['goals']
            for ind in range(len(goals) - 1, -1, -1):
                goal = goals[ind]
                if goal['goal_name'] in goals_to_check:
                    del goals[ind]

        # Get new values
        goals = check_goals(game_val, goals_to_check)

        # Write new values
        for goal in goals:
            goal_name = goal['goal_name']
            mongo_val['goals'].append(goal)
            checker_output[goal_name] += 1

        mongo_val = dict(mongo_val)
        output_collection.save(mongo_val)

        if last and game_id == last:
            break
        if args.max_games >= 0 and total_checked >= args.max_games:
            break

    log.info("Ending run: %s", scanner.status_msg())
    scanner.save()
    print_totals(checker_output, total_checked)
Exemple #22
0
def main(parsed_args):
    """Primary update cycle"""

    # Scrape and load the data from isotropic, proceeding from the
    # current day backwards, until no games are inserted
    log.info("Starting scrape for raw games")
    for date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today(), reverse=True):
        log.info("Invoking scrape_raw_games async task for %s", date)
        async_result = watch_and_log(background.tasks.scrape_raw_games.s(date))
        inserted = async_result.get()

        if inserted is None:
            log.info("Nothing processed for %s", date)
        elif inserted == 0:
            log.info("No games inserted for %s", date)
            break

    # Invoke the analyze script
    log.info("Starting analyze")
    analyze.main(parsed_args)

    # Check for goals
    log.info("Starting search for goals acheived")
    for date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today(), reverse=True):
        log.info("Invoking calc_goals_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date]))
        inserted = async_result.get()

        if inserted == 0:
            log.info("No games parsed for goals on %s", date)
            break

    # Check for game_stats
    log.info("Starting game_stats summarization")
    for date in utils.daterange(datetime.date(2010, 10, 15),
                                datetime.date.today(), reverse=True):
        log.info("Invoking summarize_game_stats_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date]))
        inserted = async_result.get()

        if inserted == 0:
            log.info("No new games summarized on %s", date)
            break

    # Invoke the count_buys script
    log.info("Counting buys")
    count_buys.main(parsed_args)

    # Invoke the run_trueskill script
    log.info("Calculating trueskill")
    run_trueskill.main(parsed_args)

    # Invoke the optimal_card_ratios script
    log.info("Calculating optimal card ratios")
    optimal_card_ratios.main(parsed_args)

    # Invoke the goal_stats script
    log.info("Calculating goal stats")
    goal_stats.main(parsed_args)

    # Invoke the scrape_leaderboard script
    log.info("Scraping the leaderboard")
    scrape_leaderboard.main()

    # Invoke the load_leaderboard script
    log.info("Loading the leaderboard")
    load_leaderboard.main()

    log.info("Done with the update.py process")
Exemple #23
0
def main(args):
    db = utils.get_mongo_database()
    games_collection = db.games
    output_collection = db.goals
    total_checked = 0

    checker_output = collections.defaultdict(int)

    if args.goals:
        valid_goals = True
        for goal_name in args.goals:
            if goal_name not in goal_check_funcs:
                valid_goals = False
                log.error("Unrecognized goal name '%s'", goal_name)
        if not valid_goals:
            exit(-1)
        goals_to_check = args.goals

        scanner = incremental_scanner.IncrementalScanner('subgoals', db)
        scanner.reset()
        main_scanner = incremental_scanner.IncrementalScanner('goals', db)
        last = main_scanner.get_max_game_id()
    else:
        goals_to_check = None
        scanner = incremental_scanner.IncrementalScanner('goals', db)
        last = None

    if not args.incremental:
        scanner.reset()
        output_collection.remove()
    output_collection.ensure_index('goals.player')

    log.info("Starting run: %s", scanner.status_msg())

    for g in utils.progress_meter(scanner.scan(games_collection, {})):
        total_checked += 1
        game_val = game.Game(g)

        # Get existing goal set (if exists)
        game_id = game_val.get_id()
        mongo_val = output_collection.find_one({'_id': game_id})

        if mongo_val is None:
            mongo_val = collections.defaultdict( dict )
            mongo_val['_id'] = game_id
            mongo_val['goals'] = []

        # If rechecking, delete old values
        if goals_to_check is not None:
            goals = mongo_val['goals']
            for ind in range(len(goals) - 1, -1, -1):
                goal = goals[ind]
                if goal['goal_name'] in goals_to_check:
                    del goals[ind]

        # Get new values
        goals = check_goals(game_val, goals_to_check)

        # Write new values
        for goal in goals:
            goal_name = goal['goal_name']
            mongo_val['goals'].append(goal)
            checker_output[goal_name] += 1

        mongo_val = dict(mongo_val)
        output_collection.save(mongo_val)

        if last and game_id == last:
            break
        if args.max_games >= 0 and total_checked >= args.max_games:
            break

    log.info("Ending run: %s", scanner.status_msg())
    scanner.save()
    print_totals(checker_output, total_checked)
Exemple #24
0
def print_totals(checker_output, total):
    for goal_name, count in sorted(checker_output.iteritems(),
                                    key=lambda t: t[1], reverse=True):
        log.info("Totals: %-15s %8d %5.2f", goal_name, count,
                 count / float(total))
Exemple #25
0
def main():
    filename_pattern = re.compile(r'^(?P<date>\d\d\d\d-\d\d-\d\d)\.html\.bz2$')
    leaderboard_pattern = re.compile(r'<td>(?P<skill_mean>-?\d+\.\d+) &plusmn; ' + \
                                     r'(?P<skill_error>-?\d+\.\d+)</td><td class=c2>' + \
                                     r'(?P<rank>\d+)</td><td class=c>' + \
                                     r'(?P<eligible_games_played>\d+)</td><td>' + \
                                     r'(?P<nickname>[^<]*) <')

    database = utils.get_mongo_database()
    history_collection = database.leaderboard_history
    scanner_collection = database.scanner

    db_val = scanner_collection.find_one({'_id': 'leaderboard_history'})
    last_date = db_val['last_date'] if db_val else '0000-00-00'

    directory = 'static/leaderboard/'

    filenames = os.listdir(directory)
    filenames.sort()

    bad_leaderboard_dates = utils.get_bad_leaderboard_dates()

    for filename in filenames:
        match = filename_pattern.search(filename)
        if not match:
            continue

        date = match.group('date')

        if date in bad_leaderboard_dates:
            # don't load data from when the leaderboard was messed up
            log.warning("Skipping %s because the leaderboard was messed up",
                        date)
            continue

        if date <= last_date:
            log.warning("Date %s is less than last date %s", date, last_date)
            continue

        log.info('Processing %s', date)

        file_obj = bz2.BZ2File(directory + filename)
        content = file_obj.read().decode('utf-8')
        file_obj.close()

        nickname_to_entry = {}
        num_matches = 0
        last_rank = -1

        pos = 0
        while True:
            match = leaderboard_pattern.search(content, pos)
            if not match:
                break

            num_matches += 1
            skill_mean = float(match.group('skill_mean'))
            skill_error = float(match.group('skill_error'))
            rank = int(match.group('rank'))
            eligible_games_played = int(match.group('eligible_games_played'))
            nickname = match.group('nickname')

            normed_nickname = name_merger.norm_name(nickname)

            if normed_nickname not in nickname_to_entry:
                nickname_to_entry[normed_nickname] = [
                    date, skill_mean, skill_error, rank, eligible_games_played
                ]
            else:
                log.info('normed nickname %s already exists for %s',
                         normed_nickname, date)

            last_rank = rank
            pos = match.end()

        log.info('%d entries matched', num_matches)

        if num_matches == 0:
            log.error(
                'No entries found, so the regex is probably not doing its job anymore.'
            )
            break

        if num_matches != last_rank:
            log.error(
                'ERROR: # entries does not match last rank, so the regex is probably not doing its job anymore.'
            )
            break

        for nickname, data in nickname_to_entry.iteritems():
            history_collection.update({'_id': nickname},
                                      {'$push': {
                                          'history': data
                                      }},
                                      upsert=True)

        log.info('%d player histories updated', len(nickname_to_entry))

        last_date = date

    scanner_collection.update({'_id': 'leaderboard_history'},
                              {'$set': {
                                  'last_date': last_date
                              }},
                              upsert=True)
def main():
    utils.ensure_exists(output_directory)

    date_of_last_cached_leaderboard = get_date_of_last_cached_leaderboard()
    log.info('date of the last cached leaderboard is %s', date_of_last_cached_leaderboard)

    date_of_current_isotropic_leaderboard = get_date_of_current_isotropic_leaderboard()
    if date_of_current_isotropic_leaderboard is None:
        log.warning('could not determine the date of the current isotropic leaderboard, so please try again later')
        return
    log.info('date of the current isotropic leaderboard is %s', date_of_current_isotropic_leaderboard)

    one_day_delta = datetime.timedelta(1)
    date = date_of_last_cached_leaderboard + one_day_delta

    while date <= date_of_current_isotropic_leaderboard:
        log.info('Processing %s', date)

        if date == date_of_current_isotropic_leaderboard:
            log.info('scraping from isotropic')
            status = run_scrape_function_with_retries(scrape_leaderboard_from_isotropic, date)
        else:
            log.info('scraping from councilroom')
            status = run_scrape_function_with_retries(scrape_leaderboard_from_councilroom, date)

            if status != 200:
                log.info('scraping from bggdl')
                status = run_scrape_function_with_retries(scrape_leaderboard_from_bggdl, date)

        if status == 200:
            pass
        elif status == 404:
            log.warning('file not found, so we will assume that it does not exist, and go to the next day')
        else:
            log.warning('Unexpected status of %d, please try again later', status)
            break

        date += one_day_delta
Exemple #27
0
def main(parsed_args):
    """Primary update cycle"""

    # Scrape and load the data from goko, proceeding from the
    # previous day backwards, until no games are inserted
    today = datetime.date.today()
    log.info("Starting scrape for raw games")
    dates = utils.daterange(datetime.date(2010,10,14), today, reverse=True)
    for date in dates:
        log.info("Invoking scrape_raw_games async task for %s", date)
        async_result = watch_and_log(background.tasks.scrape_raw_games.s(date))
        inserted = async_result.get()

        if inserted is None:
            log.info("Nothing processed for %s", date)
        elif inserted == 0:
            log.info("No games inserted for %s", date)
            break

    # Invoke the analyze script
    log.info("Starting analyze")
    analyze.main(parsed_args)

    # Check for goals
    log.info("Starting search for goals acheived")
    # Check for game_stats
    log.info("Starting game_stats summarization")
    dates = utils.daterange(datetime.date(2010,10,14), today, reverse=True)
    for date in dates:
        log.info("Invoking calc_goals_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date]))
        inserted = async_result.get()

        log.info("Invoking summarize_game_stats_for_days async task for %s", date)
        async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date]))
        if inserted == 0:
            log.info("No games parsed for goals on %s", date)
            break


    # Invoke the count_buys script
    log.info("Counting buys")
    count_buys.main(parsed_args)

    # Invoke the run_trueskill script
    log.info("Calculating trueskill")
    run_trueskill.main(parsed_args)

    # Invoke the optimal_card_ratios script
    log.info("Calculating optimal card ratios")
    optimal_card_ratios.main(parsed_args)

    # Invoke the goal_stats script
    log.info("Calculating goal stats")
    goal_stats.main(parsed_args)

    # Invoke the scrape_leaderboard script
    log.info("Scraping the leaderboard")
    scrape_leaderboard.main()

    # Invoke the load_leaderboard script
    log.info("Loading the leaderboard")
    load_leaderboard.main()

    log.info("Starting analyze2") # This is slow. Is it fast enough on cr?
    analyze2.main(parsed_args)
    log.info("Done with the update.py process")