def execute(self, agent: Agent, state: SimState) -> None: if agent.state() is not AgentState.INFECTIVE: return if np.random.random() < state.remove_prob(): agent.set_state(AgentState.REMOVED) else: agent.update_sick_days()
def main(config_file_path): config_parser = get_config_parser(config_file_path) config = get_config(config_parser) logger = get_logger(config) with tf.Session() as sess: processor = Processor(config, logger) env = Environment(logger, config, processor.price_blocks, processor.timestamp_blocks) agent = Agent(sess, logger, config, env) agent.train() agent.summary_writer.close()
def move_agent(self, agent: Agent, state: SimState) -> None: grid = agent.grid() if grid.is_fully_occupied(): return if agent.state() is AgentState.DEAD or agent.is_quarantined(): return # We don't want zombies move_probability = np.random.randint(low=0, high=100) if move_probability <= state.get_mixing_value_m() * 100: new_grid_pos = get_free_pos(grid) old_grid_pos = agent.get_pos() grid.move_agent(old_grid_pos, new_grid_pos)
def play_game(p1: Agent, p2: Agent, env: Environment, draw=False): print("play game!") current_player =None while not env.game_over(): #alternate between players if current_player == p1: current_player = p2 else: current_player = p1 # draw the board before the user who wants to see it makes a move if draw: if draw == 1 and current_player == p1: env.draw_board() if draw == 2 and current_player == p2: env.draw_board() #make an action current_player.take_action(env) #update state history state = env.get_state() p1.update_state_history(state) p2.update_state_history(state) if draw: env.draw_board() #do the value function update p1.update(env) p2.update(env)
def main(config_file_path): config_parser = get_config_parser(config_file_path) config = get_config(config_parser) logger = get_logger(config) with tf.Session() as sess: processor = Processor(config, logger) env = Environment(logger, config, processor.diff_blocks, processor.price_blocks, processor.timestamp_blocks) agent = Agent(sess, logger, config, env) agent.train() agent.summary_writer.close()
def execute_all(agent: Agent): try: func_list = filtered[agent.state()] for func in func_list: func[0].execute(agent, state) except KeyError: pass
def __init__(self): LOG.info('init master') self.__loop_count = 0 self.__train_step = 0 self.__args = self._set_args() LOG.info("the args is{}".format(self.__args)) self.rainbow = Agent(self.__args, ACTION_SPACE) self.rainbow.train() self.__count_list = list() self.__queue_list = list() self.__memory_list = list() for _ in range(MAX_WORKER_COUNT): self.__count_list.append(0) self.__queue_list.append(queue.Queue()) self.__memory_list.append(ReplayMemory(self.__args, self.__args.memory_capacity)) self.__priority_weight_increase = (1 - self.__args.priority_weight) / ( self.__args.T_max - self.__args.learn_start)
def agent_form(): agent = Agent() agent.name = raw_input("Nome do agente:\n") agent.address = raw_input("Endereço do agente:\n") agent.telephone = raw_input("Telefone do agente:\n") agent.category = raw_input("Categoria a qual o agente pertence:\n") agent_service.add_agent(agent)
def move_agent(self, agent: Agent, state: SimState) -> None: grid = agent.grid() if grid.is_fully_occupied(): return if agent.state() is AgentState.DEAD or agent.is_quarantined(): return # We don't want zombies move_probability = np.random.randint(low=0, high=100) if move_probability <= state.get_mixing_value_m() * 100: radius = state.movement_limit_radius() if state.movement_limit_high_distances_are_uncommon(): # Recalculate radius -> lower radius is more probable mean = 0 standard_deviation = radius / 3 radius = min( max( 1, int( np.round( np.abs( norm.rvs(size=1, loc=mean, scale=standard_deviation)[0])))), radius) try: new_grid_pos = get_free_pos_limited( grid, pos=agent.get_pos(), radius=radius, metric=state.movement_limit_metric(), ) old_grid_pos = agent.get_pos() grid.move_agent(old_grid_pos, new_grid_pos) finally: return
def main(config_file_path): config_parser = get_config_parser(config_file_path) config = get_config(config_parser) logger = get_logger(config) with tf.Session() as sess: preprocessor = Preprocessor(config, logger) env = Environment(logger, config, preprocessor.price_blocks) agent = Agent(sess, logger, config, env) summary_writer = tf.summary.FileWriter(config[TENSORBOARD_LOG_DIR]) summary_writer.add_graph(sess.graph) summary_writer.close()
def execute(self, agent: Agent, state: SimState) -> None: if agent.state() is not AgentState.INCUBATION: return if agent.incubation_days() is state.incubation_period(): agent.set_state(AgentState.INFECTIVE) else: agent.update_incubation_days()
def spawn_agent(self, grid_pos: GridPos, agent_state: AgentState) -> None: """ Create an agent with the status at the position, if it is not already occupied. Author: Beil Benedikt :param grid_pos: :param agent_state: :return: Nothing """ if self.is_occupied(grid_pos): raise ValueError( "This field is already occupied. No agent can be created here. " ) self.set_agent(Agent(self.__scheduler, grid_pos, agent_state, self), grid_pos)
def main(config_path, train_mode=True, weights_path=None): """Load the environment, create an agent, and train it. """ config = get_config(config_path) env, brain_name = load_environment() state_size, action_size = get_env_metadata(env, brain_name) agent = Agent(state_size=state_size, action_size=action_size, config=config, random_seed=10) scores = ddpg(env, brain_name, agent, config, train_mode, weights_path) env.close() return scores
def execute(self, agent: Agent, state: SimState) -> None: """Basically the same method as in the DefaultStatusStrategy, but adding the lethality check. :param agent Agent to update :param state State the simulation is in""" if agent.state() is not AgentState.INFECTIVE: return if np.random.random() < state.remove_prob(): if np.random.random() < state.lethality(): agent.set_state(AgentState.DEAD) else: agent.set_state(AgentState.IMMUNE) else: agent.update_sick_days()
def execute(self, agent: Agent, state: SimState) -> None: if agent.is_quarantined(): return if agent.state() is AgentState.INFECTIVE or AgentState.INCUBATION: infection_radius = state.infection_env_radius() infection_env_size = infection_radius * 2 + 1 size = agent.grid().get_size() check_list = list() grid_pos = agent.get_pos() x = grid_pos.row() y = grid_pos.col() if state.infection_env_metric() == EnvironmentMetric.MANHATTAN: for r in range(0, infection_env_size): offset = abs(infection_radius - r) check_row = y - infection_radius + r for c in range(offset, infection_env_size - offset): check_column = x - infection_radius + c check_list.append((check_column, check_row)) elif state.infection_env_metric() == EnvironmentMetric.EUCLIDEAN: for r in range(0, infection_env_size): check_row = y - infection_radius + r for c in range(0, infection_env_size): check_column = x - infection_radius + c distance = np.round(np.sqrt((infection_radius - r) ** 2 + (infection_radius - c) ** 2)) if 0 < distance <= infection_radius: check_list.append((check_column, check_row)) else: raise ValueError('Metric not implemented') check_list = list(filter(lambda pos: 0 <= pos[0] < size and 0 <= pos[1] < size, check_list)) for check_pos in check_list: to_check = agent.grid().get_agent(GridPos(np.uint(check_pos[0]), np.uint(check_pos[1]))) if to_check is not None and to_check.state() is AgentState.SUSCEPTIBLE: if np.random.random() < state.infection_prob(): if state.incubation_period_enabled(): to_check.set_state(AgentState.INCUBATION) else: to_check.set_state(AgentState.INFECTIVE) agent.update_infected_count()
def generate_quick_agent_observation(reduce_A=True, num_neighbours=2, reduce_A_policies=True, reduce_A_inference=True): idea_levels = 2 # the levels of beliefs that agents can have about the idea (e.g. 'True' vs. 'False', in case `idea_levels` ==2) num_H = 2 #the number of hashtags, or observations that can shed light on the idea h_idea_mapping = np.eye(num_H) h_idea_mapping[:, 0] = utils.softmax(h_idea_mapping[:, 0] * 1.0) h_idea_mapping[:, 1] = utils.softmax(h_idea_mapping[:, 1] * 1.0) agent_params = { "neighbour_params": { "ecb_precisions": np.array([[8.0, 8.0], [8.0, 8.0]]), "num_neighbours": num_neighbours, "env_determinism": 9.0, "belief_determinism": np.array([7.0, 7.0]) }, "idea_mapping_params": { "num_H": num_H, "idea_levels": idea_levels, "h_idea_mapping": h_idea_mapping }, "policy_params": { "initial_action": [np.random.randint(num_H), 0], "belief2tweet_mapping": np.eye(num_H), "E_lr": 0.7 }, "C_params": { "preference_shape": None, "cohesion_exp": None, "cohesion_temp": None } } observation = np.zeros(num_neighbours + 3) observation[2] = 1 agent = Agent(**agent_params, reduce_A=reduce_A, reduce_A_policies=reduce_A_policies, reduce_A_inferennce=reduce_A_inference) return agent, observation
import pyglet from pyglet.window import key from model.agent import Agent from game.view import View import numpy as np import sys # define constants _width, _height = 300, 400 if __name__ == '__main__': _screen = View(_width, _height, 'Tetris') _board = _screen._board if len(sys.argv) == 1: _agent = Agent() _agent.run(_board) else: model = sys.argv[1] _screen.use_trained_agent(model) pyglet.app.run()
class Master: """ master, train AI model """ def __init__(self): LOG.info('init master') self.__loop_count = 0 self.__train_step = 0 self.__args = self._set_args() LOG.info("the args is{}".format(self.__args)) self.rainbow = Agent(self.__args, ACTION_SPACE) self.rainbow.train() self.__count_list = list() self.__queue_list = list() self.__memory_list = list() for _ in range(MAX_WORKER_COUNT): self.__count_list.append(0) self.__queue_list.append(queue.Queue()) self.__memory_list.append(ReplayMemory(self.__args, self.__args.memory_capacity)) self.__priority_weight_increase = (1 - self.__args.priority_weight) / ( self.__args.T_max - self.__args.learn_start) def send_transition(self, index, state, action_index, reward, done): self.__queue_list[index].put((state, action_index, reward, done)) return def __get_action_data(self, idx): while True: if not self.__queue_list[idx].empty(): (state, action_index, reward, done) = self.__queue_list[idx].get() self.__memory_list[idx].append(state, action_index, reward, done) self.__count_list[idx] += 1 return True return False def __get_train_data(self): index_list = list() for idx in range(MAX_WORKER_COUNT): if self.__get_action_data(idx) is True: index_list.append(idx) return index_list def __save_train_model(self): if self.__train_step % 2e4 == 0: st = time.time() self.rainbow.save('./Model/', name='model_{}.pth'.format(self.__train_step)) et = time.time() cost_ime = ((et - st) * 1000) LOG.info('saving rainbow costs {} ms at train step {}'.format(cost_ime, self.__train_step)) def __print_progress_log(self, start_time): if self.__loop_count % LOG_FREQUENCY == 0: cost_ime = ((time.time() - start_time) * 1000) LOG.info('train rainbow is {} ms at loop count {}'.format(cost_ime, self.__loop_count)) def train(self): start_time = time.time() index_list = self.__get_train_data() if len(index_list) == 0: return for _ in range(3): i = np.random.randint(len(index_list)) idx = index_list[i] if self.__count_list[idx] >= self.__args.learn_start: # Anneal importance sampling weight β to 1 self.__memory_list[idx].priority_weight = min( self.__memory_list[idx].priority_weight + self.__priority_weight_increase, 1) if self.__loop_count % self.__args.replay_frequency == 0: start_time = time.time() self.rainbow.learn(self.__memory_list[idx]) # Train with n-step distributional double-Q learning self.__print_progress_log(start_time) self.__save_train_model() self.__train_step += 1 # Update target network if self.__loop_count % self.__args.target_update == 0: # LOG.info('master updates target net at train step {}'.format(self.__trainStep)) self.rainbow.update_target_net() if self.__loop_count % LOG_FREQUENCY == 0: LOG.info('train time is {} ms at loop count {}'.format(((time.time() - start_time) * 1000), self.__loop_count)) self.__loop_count += 1 return # pylint: disable=R0201 def _set_args(self): parser = argparse.ArgumentParser(description='Rainbow') parser.add_argument('--enable-cuda', action='store_true', help='Enable CUDA') parser.add_argument('--enable-cudnn', action='store_true', help='Enable cuDNN') parser.add_argument('--T-max', type=int, default=int(50e6), metavar='STEPS', help='Number of training steps (4x number of frames)') parser.add_argument('--architecture', type=str, default='canonical', choices=['canonical', 'data-efficient'], metavar='ARCH', help='Network architecture') parser.add_argument('--history-length', type=int, default=4, metavar='T', help='Number of consecutive states processed') parser.add_argument('--hidden-size', type=int, default=512, metavar='SIZE', help='Network hidden size') parser.add_argument('--noisy-std', type=float, default=0.1, metavar='σ', help='Initial standard deviation of noisy linear layers') parser.add_argument('--atoms', type=int, default=51, metavar='C', help='Discretised size of value distribution') parser.add_argument('--V-min', type=float, default=-10, metavar='V', help='Minimum of value distribution support') parser.add_argument('--V-max', type=float, default=10, metavar='V', help='Maximum of value distribution support') parser.add_argument('--model', type=str, metavar='PARAMS', help='Pretrained model (state dict)') parser.add_argument('--memory-capacity', type=int, default=int(40000), metavar='CAPACITY', help='Experience replay memory capacity') parser.add_argument('--replay-frequency', type=int, default=1, metavar='k', help='Frequency of sampling from memory') parser.add_argument('--priority-exponent', type=float, default=0.5, metavar='ω', help='Prioritised experience replay exponent (originally denoted α)') parser.add_argument('--priority-weight', type=float, default=0.4, metavar='β', help='Initial prioritised experience replay importance sampling weight') parser.add_argument('--multi-step', type=int, default=3, metavar='n', help='Number of steps for multi-step return') parser.add_argument('--discount', type=float, default=0.99, metavar='γ', help='Discount factor') parser.add_argument('--target-update', type=int, default=int(1e3), metavar='τ', help='Number of steps after which to update target network') parser.add_argument('--learning-rate', type=float, default=1e-4, metavar='η', help='Learning rate') parser.add_argument('--adam-eps', type=float, default=1.5e-4, metavar='ε', help='Adam epsilon') parser.add_argument('--batch-size', type=int, default=32, metavar='SIZE', help='Batch size') parser.add_argument('--learn-start', type=int, default=int(400), metavar='STEPS', help='Number of steps before starting training') # Setup args = parser.parse_args() # set random seed np.random.seed(123) torch.manual_seed(np.random.randint(1, 10000)) args.enable_cuda = True args.enable_cudnn = True # set torch device if torch.cuda.is_available() and args.enable_cuda: args.device = torch.device('cuda') torch.cuda.manual_seed(np.random.randint(1, 10000)) torch.backends.cudnn.enabled = args.enable_cudnn else: args.device = torch.device('cpu') return args
import pytorch_lightning as pl from data.rufspiel_lange_karte import RufspielLangeKarteDataModule from model.agent import Agent trainer = pl.Trainer() model = Agent(embedding_dim=16) data = RufspielLangeKarteDataModule('games/rufspiel_lange_karte') trainer.fit(model, data)
import pyglet from model.agent import Agent from game.view import View import sys # define constants WIDTH, HEIGHT = 300, 400 if __name__ == '__main__': screen = View(WIDTH, HEIGHT, 'Tetris') board = screen.board if len(sys.argv) == 2: if sys.argv[1] == 'train': _agent = Agent() _agent.run(board) elif len(sys.argv) >= 3: model = sys.argv[2] if sys.argv[1] == 'play': screen.use_trained_agent(model) if sys.argv[1] == 'train': _agent = Agent(model_num=model) _agent.run(board) pyglet.app.run()
return agent_params # %% fig, axs = plt.subplots(2, 2) #plt.figure(figsize=(12,8)) fig.set_figheight(20) fig.set_figwidth(20) env_d = 8 c = 0 for i, ecb in enumerate(np.linspace(3,9,2)): print("ECB") print(ecb) for j, belief_d in enumerate(np.linspace(3,9,2)): print("BELIEF D") print(belief_d) agent_params = agent_p(belief_d = belief_d, env_d = env_d, ecb = ecb) agent = Agent(**agent_params,reduce_A=True) T = 100 neighbour_0_tweets = 1*np.ones(T) # neighbour 1 tweets a bunch of Hashtag 1's neighbour_1_tweets = 2*np.ones(T) # neighbour 2 tweets a bunch of Hashtag 2's my_first_neighbour = 0 my_first_tweet = 0 if my_first_neighbour == 0: observation = (my_first_tweet, int(neighbour_0_tweets[0]), 0, my_first_neighbour) elif my_first_neighbour == 1: observation = (my_first_tweet, 0, int(neighbour_1_tweets[0]), my_first_neighbour) history_of_idea_beliefs = np.zeros((T,idea_levels)) # history of my own posterior over the truth/falsity of the idea history_of_beliefs_about_other = np.zeros((T,agent.genmodel.num_states[1],num_neighbours)) # histoyr of my posterior beliefs about the beliefs of my two neighbours about the truth/falsity of the idea
import numpy as np ENV_NAME = 'BreakoutDeterministic-v4' # Create environment game_wrapper = GameWrapper(MAX_NOOP_STEPS) print("The environment has the following {} actions: {}".format( game_wrapper.env.action_space.n, game_wrapper.env.unwrapped.get_action_meanings())) # Create agent MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE) TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE) replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE) agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, input_shape=INPUT_SHAPE) print('Loading model...') # We only want to load the replay buffer when resuming training agent.load('./saved_models/save-02502048/', load_replay_buffer=False) print('Loaded.') terminal = True eval_rewards = [] evaluate_frame_number = 0 for frame in range(EVAL_LENGTH): if terminal: game_wrapper.reset(evaluation=True) life_lost = True episode_reward_sum = 0
def main_function(location, num_of_panels, num_of_turbines, num_of_batteries): # Get arguments if len(sys.argv) > 1: episodes_num = int(sys.argv[1]) else: episodes_num = 2000 # House dependent parameters # location = 'California' # num_of_panels = 30 # Number of 250-watts solar panels # num_of_turbines = 2 # Number of 400 KW wind turbines # num_of_batteries = 2 house = House(location, num_of_panels, num_of_turbines, num_of_batteries) # Main dependent parameters num_of_months = 12 num_of_days = 30 # number of days per episode num_time_states = 4 epsilon = 0.5 alpha = 0.8 # Initiate Agent agent = Agent() Q = agent.initialize_Q() avg_Q_old = np.mean(Q) # For printing and plots print_iteration = 50 # ARMAN: What is a print_flag? print_flag = False # ARMAN: Needs comments rList = [] solarList = [] windList = [] ffList = [] battstorageList = [] battusedList = [] energyList = [] solarSubList = [] windSubList = [] ffSubList = [] battstorageSubList = [] battusedSubList = [] final_itr = [] final_list = [] final_solar = [] solar_dict = {0: [], 1: [], 2: [], 3: []} final_wind = [] wind_dict = {0: [], 1: [], 2: [], 3: []} final_ff = [] ff_dict = {0: [], 1: [], 2: [], 3: []} final_battery = [] battery_dict = {0: [], 1: [], 2: [], 3: []} ## for realtime plotting # fig, ax = plt.subplots() # ax.set_ylabel("Energy (kWh)") # ax.set_title("Evolution of Energy Use") for itr in range(episodes_num): if itr % print_iteration == 0: print_flag = True # The house stays constant for every episode env = EnergyEnvironment(house) cur_state = env.state total_reward = 0 solar_avg = 0 wind_avg = 0 ff_avg = 0 batt_storage_avg = 0 batt_used_avg = 0 # for month in range(num_of_months): # env.state[env.month_index] = month for day in range(num_of_days): total_solar_energy = 0 total_wind_energy = 0 total_grid_energy = 0 total_battery_used = 0 for i in range(num_time_states): action, cur_state_index, action_index = agent.get_action( cur_state, Q, epsilon) reward, next_state = env.step(action, cur_state) Q = agent.get_Q(action, cur_state, Q, epsilon, cur_state_index, action_index, reward, alpha) cur_state = next_state total_reward += reward # calculate total total_solar_energy += env.solar_energy total_wind_energy += env.wind_energy total_grid_energy += env.grid_energy total_battery_used += env.battery_used if itr == (episodes_num - 1): solar_dict[i].append(env.solar_energy) wind_dict[i].append(env.wind_energy) ff_dict[i].append(env.grid_energy) battery_dict[i].append(env.battery_used) # store how much is stored in the battery at the end of each day total_battery_stored = env.battery_energy # save total daily energy produced from different sources solarSubList.append(total_solar_energy) windSubList.append(total_wind_energy) ffSubList.append(total_grid_energy) battstorageSubList.append(total_battery_stored) battusedSubList.append(total_battery_used) solar_avg = np.mean(solarSubList) wind_avg = np.mean(windSubList) ff_avg = np.mean(ffSubList) batt_storage_avg = np.mean(battstorageSubList) batt_used_avg = np.mean(battusedSubList) if print_flag: avg_Q_new = np.mean(Q) avg_Q_change = abs(avg_Q_new - avg_Q_old) utils.print_info(itr, env, solar_avg, wind_avg, ff_avg, batt_storage_avg, batt_used_avg, avg_Q_change) avg_Q_old = avg_Q_new solarList.append(solar_avg) windList.append(wind_avg) ffList.append(ff_avg) battstorageList.append(batt_storage_avg) battusedList.append(np.mean(batt_used_avg)) # plt.ion() # plots.real_time_plot([[solar_avg], [wind_avg], [ff_avg], # [batt_storage_avg], [batt_used_avg]], # colors=['b', 'g', 'r', 'purple', 'gray'], # legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage", # "Battery Usage"], ax=ax) solarSubList = [] windSubList = [] ffSubList = [] battstorageSubList = [] battusedSubList = [] print_flag = False #total reward per episode appended for learning curve visualization rList.append(total_reward) #decrease exploration factor by a little bit every episode epsilon = max(0, epsilon - 0.0005) alpha = max(0, alpha - 0.0005) # plt.close() print("Score over time: " + str(sum(rList) / episodes_num)) print("Q-values:", Q) plots.plot_learning_curve(rList) for i in range(num_time_states): final_solar.append(np.mean(solar_dict[i])) final_wind.append(np.mean(wind_dict[i])) final_ff.append(np.mean(ff_dict[i])) final_battery.append(np.mean(battery_dict[i])) energyList.append(solarList) energyList.append(windList) energyList.append(ffList) # energyList.append(battstorageList) energyList.append(battusedList) final_itr.append(final_solar) final_itr.append(final_wind) final_itr.append(final_ff) final_itr.append(final_battery) # plots.multiBarPlot_final(list(range(4)), final_itr, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)", # title="Final Iteration of Energy Use", legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"]) # # plots.multiBarPlot(list(range(len(solarList))), energyList, colors=['b', 'g', 'r', 'purple', 'gray'], ylabel="Energy (kWh)", # title="Evolution of Energy Use", legends=["Solar Energy", "Wind Energy", "Fossil Fuel Energy", "Battery Storage", "Battery Usage"]) return list(range(len(solarList))), energyList, list( range(len(final_solar))), final_itr, list(range(len(rList))), rList
def execute(self, agent: Agent, state: SimState) -> None: """Updates the agents 'vaccine' before executing other checks""" if agent.state() == AgentState.SUSCEPTIBLE and self.days == state.vaccine_time() \ and np.random.random() < state.vaccine_share(): agent.set_state(AgentState.IMMUNE)
# TODO: Move this to another module # Create or load the agent if LOAD_FROM is None: frame_number = 0 rewards = [] loss_list = [] # Build main and target networks MAIN_DQN = build_q_network(LEARNING_RATE, input_shape=INPUT_SHAPE) TARGET_DQN = build_q_network(input_shape=INPUT_SHAPE) replay_buffer = ReplayBuffer(size=MEM_SIZE, input_shape=INPUT_SHAPE) agent = Agent(MAIN_DQN, TARGET_DQN, replay_buffer, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE) else: # TODO: LOADING IS A LITTLE BROKEN AT THE MOMENTS! # Load the agent instead print('Loading from', LOAD_FROM) meta = agent.load(LOAD_FROM, LOAD_REPLAY_BUFFER) # Apply information loaded from meta frame_number = meta['frame_number'] rewards = meta['rewards'] loss_list = meta['loss_list'] print('Loaded')
def run_Game(model, env_name, lifes, episodes): if model == 'DQN': from model.cnnBrain import DQN_Brain as Brain from model.agent import DQN_Agent as Agent elif model == 'DDQN': from model.cnnBrain import DDQN_Brain as Brain from model.agent import DDQN_Agent as Agent elif model == 'PDQN': from model.cnnBrain import PDQN_Brain as Brain from model.agent import PDQN_Agent as Agent elif model == 'PDDQN': from model.cnnBrain import PDDQN_Brain as Brain from model.agent import PDDQN_Agent as Agent elif model == 'DQN_PER': from model.cnnBrain import DQN_PER_Brain as Brain from model.agent import DQN_PER_Agent as Agent elif model == 'DDQN_PER': from model.cnnBrain import DDQN_PER_Brain as Brain from model.agent import DDQN_PER_Agent as Agent # lifes = 5 # env_name = 'Breakout' env = gym.make("{}NoFrameskip-v4".format(env_name)) # 定义使用 gym 库中的那一个环境 print('\nThe config:\n', configs, '\n') filters_per_layer = configs['Brain']['filters_per_layer'] kernel_size_per_layer = configs['Brain']['kernel_size_per_layer'] conv_strides_per_layer = configs['Brain']['conv_strides_per_layer'] learning_rate = configs['Brain']['learning_rate'] output_graph = configs['Brain']['output_graph'] restore = configs['Brain']['restore'] reward_decay = configs['Agent']['reward_decay'] replace_target_iter = configs['Agent']['replace_target_iter'] memory_size = configs['Agent']['memory_size'] batch_size = configs['Agent']['batch_size'] MAX_EPSILON = configs['Agent']['MAX_EPSILON'] MIN_EPSILON = configs['Agent']['MIN_EPSILON'] LAMBDA = configs['Agent']['LAMBDA'] replay_start_size = configs['ExperienceReplay']['replay_start_size'] update_frequency = configs['ExperienceReplay']['update_frequency'] brain = Brain(n_actions=env.action_space.n, observation_width=84, observation_height=84, observation_depth=4, learning_rate=learning_rate, filters_per_layer=filters_per_layer, kernel_size_per_layer=kernel_size_per_layer, conv_strides_per_layer=conv_strides_per_layer, restore=restore, output_graph=output_graph, checkpoint_dir=(env_name + '_' + model + '_CNN_Net')) agent = Agent( brain=brain, n_actions=env.action_space.n, observation_space_shape=env.observation_space.shape, reward_decay=reward_decay, MAX_EPSILON=MAX_EPSILON, # epsilon 的最大值 MIN_EPSILON=MIN_EPSILON, # epsilon 的最小值 LAMBDA=LAMBDA, replace_target_iter=replace_target_iter, memory_size=memory_size, batch_size=batch_size, ) dataStorage = DataStorage() env = wrap_env(env) run_AtariGame(episodes, model, env, env_name, lifes, agent, brain, dataStorage, replay_start_size, update_frequency, False) # last params = True 记录 q value
def initialize_network(G, agent_constructor_params, T): """ Initializes a network object G that stores agent-level information (e.g. parameters of individual generative models, global node-indices, ...) and information about the generative process. """ single_node_attrs = { 'agent': {}, 'self_global_label_mapping': {}, 'qs': {}, 'q_pi': {}, 'o': {}, 'selected_actions': {}, 'my_tweet': {}, 'other_tweet': {}, 'sampled_neighbors': {} } single_node_attrs['stored_data'] = { i: list(single_node_attrs.keys()) for i in G.nodes() } for agent_i in G.nodes(): agent = Agent(**agent_constructor_params[agent_i]) self_global_label_mapping = dict( zip(range(G.degree(agent_i)), list(nx.neighbors(G, agent_i)))) single_node_attrs['agent'][agent_i] = agent single_node_attrs['self_global_label_mapping'][ agent_i] = self_global_label_mapping single_node_attrs['qs'][agent_i] = np.empty( (T, agent.genmodel.num_factors), dtype=object ) # history of the posterior beliefs about hidden states of `agent_i` single_node_attrs['q_pi'][agent_i] = np.empty( (T, len(agent.genmodel.policies)), dtype=object ) # history of the posterior beliefs about policies of `agent_i` single_node_attrs['o'][agent_i] = np.zeros( (T + 1, agent.genmodel.num_modalities), dtype=int ) # history of the indices of the observations made by `agent_i`. One extra time index for the last timestep, which has no subsequent active inference loop single_node_attrs['selected_actions'][agent_i] = np.zeros( (T, 2), dtype=int) # history indices of the actions selected by `agent_i` single_node_attrs['my_tweet'][agent_i] = np.zeros( T + 1 ) # history of indices of `my_tweet` (same as G.nodes()[agent_i][`o`][:,0]) single_node_attrs['other_tweet'][agent_i] = np.zeros( T + 1 ) # history of indices of `other_tweet` (same as G.nodes()[agent_i][`o`][t,n+1]) where `n` is the index of the selected neighbour at time t single_node_attrs['sampled_neighbors'][agent_i] = np.zeros(T + 1) for attr, attr_dict in single_node_attrs.items(): nx.set_node_attributes(G, attr_dict, attr) return G
def execute(self, agent: Agent, state: SimState) -> None: """ Isolate (Remove from Grid) a given share of infected people for the sickness-duration. Afterwards they need to be added again to the Grid as removed/dead/immune. """ if agent.is_quarantined(): if agent.state() is AgentState.DEAD or agent.state() is AgentState.IMMUNE or agent.state() is AgentState.REMOVED: grid = agent.grid() for row in range(grid.get_size()): for col in range(grid.get_size()): grid_pos = GridPos(np.uint(row), np.uint(col)) if not grid.is_occupied(grid_pos): grid.set_agent(agent, grid_pos) agent.set_pos(grid_pos) agent.set_quarantined(False) agent.grid().get_quarantinedAgents().remove(agent) state.add_to_quarantined_count(-1) return else: isolate_share = state.quarantine_share() # Share of infected cells to isolate infected = state.infected_count() if agent.state() == AgentState.INFECTIVE and state.get_quarantined_count() < isolate_share * ( infected + state.get_quarantined_count()): agent.set_quarantined(True) agent.grid().get_quarantinedAgents().append(agent) agent.grid().set_agent(None, agent.get_pos()) agent.get_scheduler().update_gui_state(agent.get_pos(), AgentState.EMPTY) state.add_to_quarantined_count(1)
from model.agent import Agent from model.environment import Environment from model.human import Human from state_util import initialV_x, initialV_o, play_game, get_state_hash_and_winner if __name__ == '__main__': # train the agent p1 = Agent() p2 = Agent() # set initial V for p1 and p2 env = Environment() state_winner_triples = get_state_hash_and_winner(env) Vx = initialV_x(env, state_winner_triples) p1.setV(Vx) Vo = initialV_o(env, state_winner_triples) p2.setV(Vo) # give each player their symbol p1.set_symbol(env.x) p2.set_symbol(env.o) T = 1000000 for t in range(T): if t % 1000 == 0: print(t) play_game(p1, p2, Environment()) # play human vs. agent
# Set global parameters rewards = { 'lose_reward': -2.5, 'win_reward': 2.5, 'yolo_reward': 0.1, 'rep_point_reward': -0.5, 'open_point_reward': 2.2 } pictures = [ 'open.png', '1.png', '2.png', '3.png', '4.png', '5.png', '6.png', '7.png', '8.png', 'close.png', 'mine.png', 'open_mine.png' ] agent = Agent(decision_field, checkpoint_dir) # Set initial parameters for minesweeper layout = start_layout() window = sg.Window('Minesweeper', layout) # Game loop while True: event, values = window.read() # Case of "exit" button if event in (sg.WIN_CLOSED, 'Exit'): break # Case of "Start game" button if event in ('grid'):
def run_Game(model, env_name, episodes): if model == 'DQN': from model.mlpBrain import DQN_Brain as Brain from model.agent import DQN_Agent as Agent elif model == 'DDQN': from model.mlpBrain import DDQN_Brain as Brain from model.agent import DDQN_Agent as Agent elif model == 'PDQN': from model.mlpBrain import PDQN_Brain as Brain from model.agent import PDQN_Agent as Agent elif model == 'PDDQN': from model.mlpBrain import PDDQN_Brain as Brain from model.agent import PDDQN_Agent as Agent elif model == 'DQN_PER': from model.mlpBrain import DQN_PER_Brain as Brain from model.agent import DQN_PER_Agent as Agent elif model == 'DDQN_PER': from model.mlpBrain import DDQN_PER_Brain as Brain from model.agent import DDQN_PER_Agent as Agent elif model == 'DQN_InAday': from model.mlpBrain import DQN_InAday_Brain as Brain from model.agent import DQN_InAday_Agent as Agent elif model == 'DQN_PER_Ipm': from model.mlpBrain import DQN_PER_Ipm_Brain as Brain from model.agent import DQN_PER_Ipm_Agent as Agent elif model == 'DDQN_PER_Ipm': from model.mlpBrain import DDQN_PER_Ipm_Brain as Brain from model.agent import DDQN_PER_Ipm_Agent as Agent elif model == 'PDQN_RePER': from model.mlpBrain import PDQN_RePER_Brain as Brain from model.agent import PDQN_RePER_Agent as Agent env = gym.make(env_name) # 定义使用 gym 库中的那一个环境 # env = env.unwrapped # 注释掉的话 每局游戏 reward之和最高200 n_actions = 11 if env_name == 'Pendulum-v0' else env.action_space.n print('\nThe config:\n', configs, '\n') neurons_per_layer = configs['Brain']['neurons_per_layer'] learning_rate = configs['Brain']['learning_rate'] output_graph = configs['Brain']['output_graph'] restore = configs['Brain']['restore'] reward_decay = configs['Agent']['reward_decay'] replace_target_iter = configs['Agent']['replace_target_iter'] memory_size = configs['Agent']['memory_size'] batch_size = configs['Agent']['batch_size'] MAX_EPSILON = configs['Agent']['MAX_EPSILON'] MIN_EPSILON = configs['Agent']['MIN_EPSILON'] LAMBDA = configs['Agent']['LAMBDA'] # learning_rate 重要 # restore 和 MAX_EPSILON 一起调整 brain = Brain(n_actions=n_actions, n_features=env.observation_space.shape[0], neurons_per_layer=neurons_per_layer, learning_rate=learning_rate, output_graph=output_graph, restore=restore, checkpoint_dir=(env_name + '_' + model + '_MLP_Net')) agent = Agent( brain=brain, n_actions=n_actions, observation_space_shape=env.observation_space.shape, reward_decay=reward_decay, replace_target_iter=replace_target_iter, memory_size=memory_size, batch_size=batch_size, MAX_EPSILON=MAX_EPSILON, MIN_EPSILON=MIN_EPSILON, LAMBDA=LAMBDA, ) dataStorage = DataStorage() if env_name == 'Pendulum-v0': run_Pendulum(episodes, env, agent, False) else: run_controlGame(episodes, env, agent, dataStorage, True) # 4-th params = True 记录 q value writeData2File(model, env_name, brain, agent, dataStorage)