Ejemplo n.º 1
0
    def collect_training_stats(environment, species, generation):
        training_info = TrainingInfo.load(environment, species)

        # Find batch
        num_batches_to_train = 0
        wall_clock_time_to_train = 0.0
        cpu_seconds_to_train = 0.0
        mcts_considerations = 0
        if generation > 1:
            for tbatch in training_info.batches:
                num_batches_to_train += 1
                wall_clock_time_to_train += tbatch.self_play_end_time - tbatch.self_play_start_time

                # util is reported from 0.0 to 100.0 for each CPU for some reason...
                cpu_seconds_to_train += (tbatch.self_play_cpu_time / 100.0)
                mcts_considerations += tbatch.total_mcts_considerations

                # This is the batch that trained this generation
                if tbatch.generation_trained == generation:
                    break

        return dict(
            num_batches_to_train=num_batches_to_train,
            wall_clock_time_to_train=wall_clock_time_to_train,
            cpu_seconds_to_train=cpu_seconds_to_train,
            mcts_considerations=mcts_considerations,
        )
Ejemplo n.º 2
0
def collate_training_efficiency_stats(environment, tournament_id):
    '''
    :environment ~ "connect_four"

    :tournament_id ~ "1593043900-gbdt_pcrR2-1-21"
    - This is just the basename of the path.
    '''
    bot_figure_stats = []
    tournament_results_path = build_tournament_results_path(tournament_id)
    results = json.loads(open(tournament_results_path, 'r').read())

    # Update all the batch info that needs to be updated
    for species in set(x[0] for x in results):
        training_info = TrainingInfo.load(environment, species)
        training_info.update_batch_stats()

    # Collect info for each bot in tournament
    for species, generation, skill_level, skill_sigma in results:
        gen_info = GenerationInfo.from_generation_info(
            environment,
            species,
            generation,
        )

        tourn_info = TournamentStats(
            environment,
            species,
            generation,
            skill_level,
            skill_sigma,
        )

        bot_figure_stats.append((gen_info, tourn_info))

    return bot_figure_stats
Ejemplo n.º 3
0
def update_batch_stats(environment_name, species_name, num_workers=12):
    sys_mon = SystemMonitor()
    tinfo = TrainingInfo.load(environment_name, species_name)
    for batch in tinfo.batches:
        # If batch stats exist, don't redo...
        if batch.self_play_cpu_time is not None:
            print("Batch stats already updated for", batch.batch_num)
            continue

        batch.self_play_cpu_time = sys_mon.query_utilization(
            batch.self_play_start_time,
            batch.self_play_end_time,
        )

        # Sanity check that there was a continuous sampling of cpu utlization.
        # - Check that it got at least a sample every 3 seconds
        self_play_cpu_time, num_samples = sys_mon.query_utilization(
            batch.self_play_start_time,
            batch.self_play_end_time,
        )
        min_allowable_samples = ((batch.self_play_end_time - batch.self_play_start_time) / 3)
        if num_samples < min_allowable_samples:
            raise RuntimeError(f"Monitoring didn't take enough samples: {num_samples} < {min_allowable_samples}")
        batch.self_play_cpu_time = self_play_cpu_time

        # Grab batch stats
        worker_args = []
        for worker_num in range(num_workers):
            worker_args.append(
                (
                    environment_name,
                    species_name,
                    batch.batch_num,
                    worker_num,
                    num_workers
                )
            )
        with Pool(num_workers) as p:
            results = p.map(run_worker, worker_args)

        batch.num_games = 0
        batch.num_positions = 0
        batch.total_mcts_considerations = 0
        for worker_num, result in enumerate(results):
            print("Finished", worker_num, species_name, batch.batch_num)
            stats = WorkerStats(*result)
            batch.num_games += stats.num_games
            batch.num_positions += stats.num_positions
            batch.total_mcts_considerations += stats.total_mcts_considerations

        # These are tabulated for both agents, total_mcts_considerations is not.
        batch.num_games = batch.num_games // 2
        batch.num_positions = batch.num_positions // 2

    # Record the batch info
    tinfo.save()
Ejemplo n.º 4
0
    def collect_figure_data(
        self,
        environment: str,
        species_list: List[str],
    ):
        data = [] # (species, training_time, generation_number)
        # Update all the batch info that needs to be updated
        for species in species_list:
            training_info = TrainingInfo.load(environment, species)
            training_info.update_batch_stats()

            for generation in range(1, training_info.current_self_play_generation()):
                gen_info = GenerationInfo.from_generation_info(
                    environment,
                    species,
                    generation,
                )

                data.append((species, gen_info.cpu_seconds_to_train, generation))

        return data
Ejemplo n.º 5
0
def training_stats(environment, species, generation):
    training_info = TrainingInfo.load(environment, species)

    # Find batch
    num_batches_to_train = 0
    wall_clock_time_to_train = 0.0
    cpu_seconds_to_train = 0.0
    if generation > 1:
        for tbatch in training_info.batches:
            num_batches_to_train += 1
            wall_clock_time_to_train += tbatch.assessment_end_time - tbatch.self_play_start_time
            cpu_seconds_to_train += 0.0  # XXX: Update

            # This is the batch that trained this generation
            if tbatch.generation_trained == generation:
                break

    return dict(
        num_batches_to_train=num_batches_to_train,
        wall_clock_time_to_train=wall_clock_time_to_train,
        cpu_seconds_to_train=cpu_seconds_to_train,
    )
Ejemplo n.º 6
0
    def build(cls, environment, species):
        ti = TrainingInfo.load(environment, species)
        generations = [x.generation_trained for x in ti.batches if x.generation_trained]

        rows = []
        for generation in generations:
            for model_type in ("value", "policy"):
                model_directory = build_model_directory(environment, species, generation)
                info_path = f"{model_directory}/{model_type}_model_training_info_{generation:06d}.json"
                info = GBDTTrainingInfo.load(info_path)
                for eval_stat in info.eval_stats:
                    rows.append(
                        EvalData(
                            environment=environment,
                            species=species,
                            generation=generation,
                            model_type=model_type,
                            dataset=eval_stat.dataset,
                            metric=eval_stat.metric,
                            iteration=eval_stat.iteration,
                            value=eval_stat.value
                        )
                    )
        return cls(rows=rows)
Ejemplo n.º 7
0
def run(
    environment,
    species_name,
    num_batches,
    num_workers=settings.SELF_PLAY_THREADS,
    adjusted_win_rate_threshold=0.50,
    num_assessment_games=200,
):
    num_faceoff_rounds = math.ceil(
        num_assessment_games /
        num_workers)  # Will play at least num_workers per round

    training_info = TrainingInfo.load(environment, species_name)
    final_training_batch = len(training_info.batches) + num_batches
    for _ in range(num_batches):
        current_batch = len(training_info.batches) + 1
        generation_self_play = training_info.current_self_play_generation()
        generation_training = generation_self_play + 1

        species = get_species(species_name)

        print(f"\n\nBatch {current_batch} / {final_training_batch}")
        print(f"environment: {environment}, species: {species_name}")
        print(f"self-play generation: {generation_self_play}")

        # Ensure directories are made/etc.
        # - Not sure this actually depends on generation, but maybe it will later.
        setup_filesystem(
            environment,
            species_name,
            generation_self_play,
        )

        # Self play another batch
        games_per_batch = species.self_play_settings(
            environment, generation_self_play)["num_games"]
        print(f"\n\nSelf Play ({games_per_batch} cycles)")
        self_play_start_time = time.time()
        run_self_play(
            environment,
            species_name,
            generation_self_play,
            games_per_batch,
            current_batch,
            num_workers,
        )
        self_play_end_time = time.time()
        elapsed = round(self_play_end_time - self_play_start_time, 1)
        cycles_per_second = round(games_per_batch / elapsed, 1)
        print(f"\nSelf play finished in {elapsed} seconds")
        print(
            f"Cycles ran: {games_per_batch}, cycles per sec: {cycles_per_second}"
        )

        # Train new model
        print("\n\nTraining")
        training_start_time = time.time()
        run_model_training(
            environment,
            species_name,
            generation_training,
            current_batch,
            num_workers,
        )
        training_end_time = time.time()
        elapsed = round(training_end_time - training_start_time, 1)
        print(f"\nTrained new models in {elapsed} seconds")

        # Assess new model
        print("\n\nAssessing")
        assessment_start_time = time.time()
        contender_matchup_info = run_faceoff(
            environment,
            species_name,
            generation_training,
            num_rounds=num_faceoff_rounds,
            num_workers=num_workers,
        )
        assessment_end_time = time.time()
        elapsed = round(assessment_end_time - assessment_start_time, 1)
        print(f"\nAssessed new model in {elapsed} seconds")

        adjusted_win_rate = contender_matchup_info.win_rate(draw_weight=0.5)
        print("Adjusted Win Rate:", round(adjusted_win_rate, 3))
        generation_trained = None
        if adjusted_win_rate >= adjusted_win_rate_threshold:
            generation_trained = generation_training
            print("FOUND NEW BOT:", generation_trained)

        training_info.finalize_batch(
            self_play_start_time=self_play_start_time,
            self_play_end_time=self_play_end_time,
            training_start_time=training_start_time,
            training_end_time=training_end_time,
            assessment_start_time=assessment_start_time,
            assessment_end_time=assessment_end_time,
            generation_self_play=generation_self_play,
            generation_trained=generation_trained,
            assessed_awr=adjusted_win_rate,
        )