def update_population(population_sorted: list, evolution_id: int, gpus) -> int: print("Updating population...") print("from evo " + str(evolution_id)) # Get finished evolution models.update_evolution_time(evolution_id, "finished_at_update") old_evolution = models.finish_evolution(evolution_id) new_evo_id = -1 if old_evolution != None: # Create new evolution iteration = int(old_evolution['iteration']) + 1 new_evo_id = models.create_evolution(old_evolution['network'], iteration, iteration * evolution_size, (iteration + 1) * evolution_size) else: print("not found") for i in range(int(len(population_sorted) / 2)): print(i) agent_good = models.get_agent(population_sorted[i][0]) agent_bad = models.get_agent(population_sorted[len(population_sorted) - 1 - i][0]) print(agent_good) print(agent_bad) epsilon = get_epsilon(float(agent_good["lr_values"])) new_lr = float(agent_good["lr_values"]) + epsilon new_agent_good_id = models.create_agent( new_evo_id, agent_good["uuid"], float(agent_good["lr_values"]), gpus[agent_good["uuid"] % len(gpus)]) new_agent_bad_id = models.create_agent( new_evo_id, agent_bad["uuid"], new_lr, gpus[agent_good["uuid"] % len(gpus)]) copy_weights(new_agent_good_id, new_agent_bad_id, new_evo_id) util.create_config_file(new_agent_bad_id, config_dir, pbt_input_base_dir) util.create_config_file(new_agent_good_id, config_dir, pbt_input_base_dir) return new_evo_id
def create_config_file(agent_id: int, config_dir, pbt_input_base_dir): agent = models.get_agent(agent_id) evolution = models.get_evolution(agent["evolution"]) evolution_size = int(evolution["steps_stop"] - evolution["steps_start"]) network = models.get_network(evolution["network"]) with open(config_dir + "base_agent_config.yaml") as base_config: cfg = yaml.safe_load(base_config) # Write config with open(get_config_file_name(agent["uuid"], config_dir), 'w') as config_file: name = str(network["name"]) + "_" + str(agent["uuid"]) cfg['gpu'] = int(agent['gpu']) cfg['name'] = str(name) cfg['training']['lr_values'] = [float(agent["lr_values"])] cfg['training']['lr_boundaries'] = [] cfg['training']['total_steps'] = evolution_size cfg['training']['checkpoint_steps'] = evolution_size cfg['training']['test_steps'] = int(1000) cfg['model'] = { 'filters': int(network["filters"]), 'policy_channels': int(network["policy_channels"]), 'residual_blocks': int(network["residual_blocks"]), 'se_ratio': int(network["se_ratio"]) } cfg['dataset'] = { 'input_train': get_pbt_train_path(evolution["iteration"], pbt_input_base_dir), 'input_test': get_pbt_test_path(evolution["iteration"], pbt_input_base_dir), 'num_chunks': evolution_size * 25, 'train_ratio': float(20 / 25) } yaml.dump(cfg, config_file)
async def get(self): """ description: Get agent info params: uuid """ try: uid = await self.request.json() uu_id = uid["uuid"] data = get_agent(uu_id) print( f"uuid : - {data.uuid} agentip:- {data.agentip} created_date:-{data.agentdate}" ) response_obj = { "status": "success", "message": f"uuid {data.uuid} agentip {data.agentip} created_date {data.agentdate}", } return web.Response(text=json.dumps(response_obj), status=200) except Exception as error_delete: return web.Response(text=f"No Record Found {error_delete}", status=404)
def get_base_network_path(agent_id: int, network_dir, name, steps: int) -> str: agent = models.get_agent(agent_id) return network_dir + name + "_" + str( agent["uuid"]) + "/" + name + "_" + str( agent["uuid"]) + "-" + str(steps)
def main(cmd): global pbt_config_file pbt_config_file = cmd.cfg.read() cfg = yaml.safe_load(pbt_config_file) print(yaml.dump(cfg, default_flow_style=False)) if cmd.setup: models.setup_database() # PBT details evolutions = cfg["pbt"]["evolutions"] population_size = cfg["pbt"]["population_size"] global config_dir config_dir = cfg["pbt"]["config_directory"] global evolution_size evolution_size = cfg["pbt"]["evolution_size"] global network_dir network_dir = cfg["pbt"]["network_path"] global lc0_path lc0_path = cfg["pbt"]["lc0_path"] global pbt_input_base_dir pbt_input_base_dir = cfg["pbt"]["input_base"] evaluation_games = int(cfg["pbt"]["evaluation_games"]) gpus = cfg['gpus'] # Model details filters = cfg['model']['filters'] residual_blocks = cfg['model']['residual_blocks'] se_ratio = cfg['model']['se_ratio'] policy_channels = cfg['model']['policy_channels'] # append model settings to create name of current network prefix = cfg['prefix'] global name name = prefix + "-" + str(filters) + "x" + str(residual_blocks) # Preparing data to usable size because otherwise each evolution the whole dataset (15GB, 2500000 Games) would be loaded. This performance increase if cmd.prep_data: util.prepare_training_data(evolutions, evolution_size, cfg["dataset"]["input_train"], cfg["dataset"]["input_test"], pbt_input_base_dir) # Create a new Network configuration if the name is different than an already existing network network_id = models.create_network(name, filters, residual_blocks, se_ratio, policy_channels) # Cleanup old agent configs if network_id != -1: util.delete_agent_config_files(config_dir) else: network_id = models.get_network_by_name(name)["id"] evolution = models.get_last_evolution(network_id) if evolution is None: evolution_id = models.create_evolution(network_id, 0, 0 * evolution_size, 1 * evolution_size) else: evolution_id = evolution['id'] # Create the initial population for the created evolution init_population(evolution_id, name, population_size, gpus) best_agent_id = -1 for _ in range(evolutions): # Train the population train(evolution_id, gpus) # Play all the matches to evaluate strength of population play_matches(evolution_id, evaluation_games) # Build the ranking to update population population, best_agent_id = build_ranking(evolution_id) # Update population. Exploitation: Stick to top performer. Exploration: Copy values of top to loosers and variate them by factor X evolution_id = update_population(population, evolution_id, gpus) agent = models.get_agent(best_agent_id) print("Best agent was ", agent["uuid"])