def __init__(self, sim, brain, name="QLearn", train_every_nth=5, train_batch_size=32, max_experience=300000, exploration_period=10000, epsilon_final=0.015, discount_factor=0.99): Agent.__init__(self, name) self.sim = sim self.brain = brain self.train_every_nth = train_every_nth self.train_batch_size = train_batch_size self.epsilon_final = epsilon_final self.discount_factor = discount_factor self.max_experience = max_experience self.exploration_period = exploration_period self.actions_executed = 0 self.memory = []
def main(): """Runs everything needed by the agent""" settings = Settings() agent = Agent(settings) agent.start() # The agent is executing try: agent.join() except KeyboardInterrupt: agent.stop()
def setUp(self): Supervisors, Projects, Students = Hierarchy(3), Hierarchy(2), Hierarchy(1) """" Some Supervisors """ self.KB = Agent("1", Supervisors, capacities=[0, 1], preferences=None, abilities=None, name="Ken Brown") self.UK = Agent("2", Supervisors, capacities=[1, 2], preferences=None, abilities=None, name="Uli Kraehmer") self.RS = Agent("3", Supervisors, capacities=[2, 2], preferences=None, abilities=None, name="Richard Steiner") self.TB = Agent("4", Supervisors, capacities=[1,3], preferences=None, abilities=None, name="Tara Brendle") self.AB = Agent("5", Supervisors, capacities=[0,1], preferences=None, abilities=None, name="Andy Baker") """ Some projects """ self.Hopf = Agent("1", Projects, capacities=[0, 1], preferences=[self.UK, self.KB, self.AB],abilities=None , name="Hopf algebras") self.Alg_no = Agent("2", Projects, capacities=[0, 1], preferences=[self.KB, self.AB], abilities=None, name="Algebraic number theory") self.Cat_thy = Agent("3", Projects, capacities=[0, 1], preferences=[self.RS, self.UK], abilities=None, name="Category Theory") self.Group_thy = Agent("4", Projects, capacities=[0, 1], preferences=[self.TB, self.AB], abilities=None, name="Group Theory") self.Topology = Agent("5", Projects, capacities=[0, 1], preferences=[self.AB, self.TB, self.RS], abilities=None, name="Topology") """ Some students """ self.Paul = Agent("0700874", Students, [0, 1], preferences=[self.Hopf, self.Alg_no, self.Group_thy], abilities=None, name="Paul Gilmartin") self.Scott = Agent("0700875", Students, [0, 1], preferences=[self.Alg_no, self.Hopf], abilities=None, name="Scott Gilmartin") self.Jim = Agent("0700876", Students, [0, 1], preferences=[self.Cat_thy, self.Topology, self.Hopf], abilities=None, name="Jimmy Boyd") self.Rachael = Agent("0700877", Students, [0,1], preferences=[self.Group_thy, self.Alg_no, self.Cat_thy], abilities=None, name="Rachael Hayhoe") self.Nameless = Agent("0700878", Students, [0,1], preferences=[self.Topology], abilities=None, name=None)
from agents import Model, Agent, run epidemic_model = Model("Epidemimodel", 100, 100) epidemic_model.add_agent(Agent()) run(epidemic_model)
from agents import Agent from simulation import Simulation from state import Environment import numpy as np # TODO: CHOICE TO Ï• DICTIONARY??? # Init the Agent's environment env = Environment() # Init the expert agent # Feed it the expert trajectories a = Agent(type='expert', action_list=['a', 'b', 'c', 'd', 'e', 'f', 'g'], environment=env, trajectories=[['a', 'b', 'c', 'e', 'g', 'b', 'c', 'e', 'g', 'c'], ['a', 'b', 'c', 'a', 'g', 'g', 'a', 'g', 'g', 'c'], ['c', 'd', 'f', 'b', 'c', 'a', 'd', 'f', 'b', 'c']]) # Build said expert trajectories a.build_trajectories() # Build the Agent's initial state distribution a.build_D() # Init a standalone environment for the state itself simul_env = Environment() # Init the simulation sim = Simulation(agents=a, environment=simul_env, alpha=1)
def perform_agent_action(self, agent: Agent, action: str): # Perform the specified action if possible, wait otherwise if action == 'UP' and self.tile_is_free(agent.x, agent.y - 1): agent.y -= 1 agent.add_event(e.MOVED_UP) elif action == 'DOWN' and self.tile_is_free(agent.x, agent.y + 1): agent.y += 1 agent.add_event(e.MOVED_DOWN) elif action == 'LEFT' and self.tile_is_free(agent.x - 1, agent.y): agent.x -= 1 agent.add_event(e.MOVED_LEFT) elif action == 'RIGHT' and self.tile_is_free(agent.x + 1, agent.y): agent.x += 1 agent.add_event(e.MOVED_RIGHT) elif action == 'BOMB' and agent.bombs_left: self.logger.info( f'Agent <{agent.name}> drops bomb at {(agent.x, agent.y)}') self.bombs.append( Bomb((agent.x, agent.y), agent, s.BOMB_TIMER, s.BOMB_POWER, agent.color, custom_sprite=agent.bomb_sprite)) agent.bombs_left = False agent.add_event(e.BOMB_DROPPED) elif action == 'WAIT': agent.add_event(e.WAITED) else: agent.add_event(e.INVALID_ACTION)
import threading import time from signal import SIGINT, SIGTERM, signal from agents import Agent, Message # generate public and private keys for server and client server_public_key, server_private_key = Agent.curve_keypair() wrong_server_public_key, wrong_server_private_key = Agent.curve_keypair() client_public_key, client_private_key = Agent.curve_keypair() client2_public_key, client2_private_key = Agent.curve_keypair() class NotificationBroker(Agent): def setup(self, name=None, pub_address=None, sub_address=None): self.create_notification_broker( pub_address, sub_address, options=self.curve_server_config(server_private_key), ) class Sender(Agent): def setup(self, name=None, pub_address=None, sub_address=None): self.counter = 0 self.pub, self.sub = self.create_notification_client( pub_address, sub_address, options=self.curve_client_config(server_public_key, client_public_key, client_private_key),
def run_rotate(): ''' Runs the simulations and saves the JSON files with an arbitrary rotation about the center of the screen. ''' def rotate(obj,theta=-20,origin=(500,300)): ''' Rotates objects about a center ''' # Translate w/r to visual origin (500,300) obj.body.position -= Vec2d(origin) # Radians to degrees theta = radians(theta) x, y = obj.body.position x_ = x*cos(theta) - y*sin(theta) y_ = y*cos(theta) + x*sin(theta) obj.body.position = [x_,y_] # Translate w/r to actual origin (0,0) obj.body.position += Vec2d(origin) video = False thetas = list(range(-19,-9))+list(range(10,19)) for scene in scenarios.__experiment3__: theta = choice(thetas) sim = getattr(scenarios, scene) env = sim(True) env.run() # Gather position data pos = env.position_dict agent_positions = env.position_dict['agent'] patient_positions = env.position_dict['patient'] fireball_positions = env.position_dict['fireball'] # Setup pygame and pymunk space = pymunk.Space() space.damping = 0.05 screen = pygame.display.set_mode((1000,600)) options = pymunk.pygame_util.DrawOptions(screen) clock = pygame.time.Clock() if video: save_screen = make_video(screen) # Setup empty agents agent = Agent(0,0,'blue',0,[]) patient = Agent(0,0,'green',0,[]) fireball = Agent(0,0,'red',0,[]) # Add agent to space space.add(agent.body, agent.shape, patient.body, patient.shape, fireball.body, fireball.shape) pygame.init() running = True while running: # print(agent_positions[0]) try: # Extract position data a_pos = agent_positions.pop(0) p_pos = patient_positions.pop(0) f_pos = fireball_positions.pop(0) # Set positions of objects agent.body.position = Vec2d(a_pos['x'],a_pos['y']) patient.body.position = Vec2d(p_pos['x'],p_pos['y']) fireball.body.position = Vec2d(f_pos['x'],f_pos['y']) # Rotate objects about the center rotate(agent,theta) rotate(patient,theta) rotate(fireball,theta) # Render space on screen (if requested) screen.fill((255,255,255)) space.debug_draw(options) pygame.display.flip() clock.tick(60) space.step(1/60.0) if video: next(save_screen) except Exception as e: running = False pygame.quit() pygame.display.quit() if video: vid_from_img("final_"+scene)
#return dictionary of agents if __name__ == "__main__": G = makegraphs.ringGraph(3) #initialize graph here c = 2 #number of commodities agentlist = defaultdict(Agent) for i in range(G.number_of_nodes()): #initialize params for agent u = np.zeros(c) e = np.ones(c) p = np.ones(c) subplans = np.zeros((G.number_of_nodes(), c)) agentlist[i] = Agent(u, e, p, subplans) nx.set_node_attributes(G, 'agentprop', agentlist) check_eq = False num_rounds = 0 while (check_eq): agents_old = nx.get_node_attributes(G, 'agentprop') agents_new = changePlans(G) nx.set_node_attributes(G, 'agentprop', agents_new) check_eq = checkEquilibrium(agents_old, agents_new) num_rounds += 1 print(str(num_rounds - 1) + ' rounds needed to reach equilibrium.') drawNetwork(G, 'agentprop', 'test.png')
def test_agent(): a = Agent() assert isinstance(a, Agent)
def test_ModelBasedReflexAgentProgram(): loc_A = (0, 0) loc_B = (1, 0) model = {loc_A: None, loc_B: None} class Rule: def __init__(self, state, action): self.__state = state self.action = action def matches(self, state): return self.__state == state # create rules for a two state Vacuum Environment rules = [ Rule((loc_A, "Dirty"), "Suck"), Rule((loc_A, "Clean"), "Right"), Rule((loc_B, "Dirty"), "Suck"), Rule((loc_B, "Clean"), "Left") ] def update_state(state, action, percept, model): loc, status = percept # the other location loc2 = tuple(map(lambda x: x[0] - x[1], zip((1, 0), loc))) # initial guess of the other location if not state or not action or not model[loc2]: model[loc2] = random.choice(['Dirty', 'Clean']) model[loc] = status # the model think environment will keep clean if agent chose to suck last step if action == 'Suck': state = percept return state # rubbish may appears suddenly, so the model guess randomly if status == 'Clean': status = random.choice(['Dirty', 'Clean']) model[loc] = status # move right or left will not influence the environment state = (loc, model[loc]) return state # create a program and then an object of the ModelBasedReflexAgentProgram program = ModelBasedReflexAgentProgram(rules, update_state, model) agent = Agent(program) # create an object of TrivialVacuumEnvironment environment = TrivialVacuumEnvironment() # add agent to the environment environment.add_thing(agent) # run the environment environment.run() # check final status of the environment assert environment.status == {(1, 0): 'Clean', (0, 0): 'Clean'}
class Evaluator(J30Runner): def __init__(self, model_name, model): super().__init__(train=False) self.model_name = model_name self.model = model.model self.agent = Agent(self.projects, model) self.result = [] def load_weights(self, number): self.model.load_weights('.\\models\\' + self.model_name + '\\' + self.model_name + '-' + str(number) + '.h5') def evaluate_project(self, project) -> float: t = 0 while not project.is_finished(): t += project.next(*self.act(project)) return t def evaluate(self, num_of_iterations=100): """Evaluates a single project for the number of iterations.""" durations = {} for project in self.projects: project_list = np.array([ Project(project.path, stochastic=project.stochastic) for _ in range(num_of_iterations) ]) durations[project.path[-8:]] = np.vectorize( self.evaluate_project, otypes=[float])(project_list) return durations def evaluate_all(self, num_of_models, num_of_iterations=100): for num_of_model in range(num_of_models): print('evaluating model', num_of_model) self.load_weights(num_of_model) self.result.append(self.evaluate(num_of_iterations)) pickle.dump( self.result, open(self.model_name + '-result-' + str(num_of_model), 'wb')) def act(self, project): """The action with the highest value is executed. This function is different from the act-function during training: If no tasks are running, the model cannot choose the wait/void action. This prevents infinite loops if the wait/void action for such a state has the highest q-value. :return: the best action and the durations of the tasks in the action """ state, durations = project.state() actions = project.get_actions() if len(actions) > 1: best_action = self.get_best_action(state, actions, project) return best_action, durations else: best_action = [] return best_action, durations def get_best_action(self, state, actions, project): inputs = np.squeeze( np.array([ self.agent.input_vector(state, action) for action in actions ])) action_values = np.squeeze(self.model.predict(inputs, len(inputs))) max_val = np.argmax(action_values) # the wait/void action must not be the best action if there are no running tasks if len(project.running) == 0 and actions[max_val] == []: max_val = np.argmax(action_values[1:]) + 1 return actions[max_val]
class TestAgent_Good_Input(unittest.TestCase): def setUp(self): Supervisors, Projects, Students = Hierarchy(3), Hierarchy(2), Hierarchy(1) """" Some Supervisors """ self.KB = Agent("1", Supervisors, capacities=[0, 1], preferences=None, abilities=None, name="Ken Brown") self.UK = Agent("2", Supervisors, capacities=[1, 2], preferences=None, abilities=None, name="Uli Kraehmer") self.RS = Agent("3", Supervisors, capacities=[2, 2], preferences=None, abilities=None, name="Richard Steiner") self.TB = Agent("4", Supervisors, capacities=[1,3], preferences=None, abilities=None, name="Tara Brendle") self.AB = Agent("5", Supervisors, capacities=[0,1], preferences=None, abilities=None, name="Andy Baker") """ Some projects """ self.Hopf = Agent("1", Projects, capacities=[0, 1], preferences=[self.UK, self.KB, self.AB],abilities=None , name="Hopf algebras") self.Alg_no = Agent("2", Projects, capacities=[0, 1], preferences=[self.KB, self.AB], abilities=None, name="Algebraic number theory") self.Cat_thy = Agent("3", Projects, capacities=[0, 1], preferences=[self.RS, self.UK], abilities=None, name="Category Theory") self.Group_thy = Agent("4", Projects, capacities=[0, 1], preferences=[self.TB, self.AB], abilities=None, name="Group Theory") self.Topology = Agent("5", Projects, capacities=[0, 1], preferences=[self.AB, self.TB, self.RS], abilities=None, name="Topology") """ Some students """ self.Paul = Agent("0700874", Students, [0, 1], preferences=[self.Hopf, self.Alg_no, self.Group_thy], abilities=None, name="Paul Gilmartin") self.Scott = Agent("0700875", Students, [0, 1], preferences=[self.Alg_no, self.Hopf], abilities=None, name="Scott Gilmartin") self.Jim = Agent("0700876", Students, [0, 1], preferences=[self.Cat_thy, self.Topology, self.Hopf], abilities=None, name="Jimmy Boyd") self.Rachael = Agent("0700877", Students, [0,1], preferences=[self.Group_thy, self.Alg_no, self.Cat_thy], abilities=None, name="Rachael Hayhoe") self.Nameless = Agent("0700878", Students, [0,1], preferences=[self.Topology], abilities=None, name=None) def test_give_name_1(self): self.Nameless.give_name("Big Man") self.assertEqual("Big Man", self.Nameless.name) def test_give_name_2(self): self.Paul.give_name("Paul G") self.assertEqual("Paul G", self.Paul.name) def test_upper_capacity_1(self): self.assertEqual(self.KB.upper_capacity, 1) def test_upper_capacity_2(self): self.assertEqual(self.TB.upper_capacity, 3) def test_preference_position_1(self): self.assertEqual(self.Paul.preference_position(self.Hopf), 1) def test_preference_position_2(self): self.assertEqual(self.Topology.preference_position(self.RS), 3) def test_lower_capacity_1(self): self.assertEqual(self.AB.lower_capacity, 0) def test_lower_capacity_2(self): self.assertEqual(self.RS.lower_capacity, 2) def test_capacity_difference_1(self): self.assertEqual(self.RS.capacity_difference, 0) def test_capacity_difference_2(self): self.assertEqual(self.Alg_no.capacity_difference, 1) if __name__ == "__main__" : unittest.main()
def __init__(self, action_set, reward_function, prior_variance, noise_variance, feature_extractor, prior_network, num_ensemble, hidden_dims=[10, 10], learning_rate=5e-4, buffer_size=50000, batch_size=64, num_batches=100, starts_learning=5000, discount=0.99, target_freq=10, verbose=False, print_every=1, test_model_path=None): Agent.__init__(self, action_set, reward_function) self.prior_variance = prior_variance self.noise_variance = noise_variance self.feature_extractor = feature_extractor self.feature_dim = self.feature_extractor.dimension dims = [self.feature_dim] + hidden_dims + [len(self.action_set)] self.prior_network = prior_network self.num_ensemble = num_ensemble # number of models in ensemble self.index = np.random.randint(self.num_ensemble) # build Q network # we use a multilayer perceptron if test_model_path is None: self.test_mode = False self.learning_rate = learning_rate self.buffer_size = buffer_size self.batch_size = batch_size self.num_batches = num_batches self.starts_learning = starts_learning self.discount = discount self.timestep = 0 self.buffer = Buffer(self.buffer_size) self.models = [] for i in range(self.num_ensemble): if self.prior_network: ''' Second network is a prior network whose weights are fixed and first network is difference network learned i.e, weights are mutable ''' self.models.append( DQNWithPrior(dims, scale=np.sqrt( self.prior_variance)).to(device)) else: self.models.append(MLP(dims).to(device)) self.models[i].initialize() ''' prior networks weights are immutable so enough to keep difference network ''' self.target_nets = [] for i in range(self.num_ensemble): if self.prior_network: self.target_nets.append( DQNWithPrior(dims, scale=np.sqrt( self.prior_variance)).to(device)) else: self.target_nets.append(MLP(dims).to(device)) self.target_nets[i].load_state_dict( self.models[i].state_dict()) self.target_nets[i].eval() self.target_freq = target_freq # target nn updated every target_freq episodes self.num_episodes = 0 self.optimizer = [] for i in range(self.num_ensemble): self.optimizer.append( torch.optim.Adam(self.models[i].parameters(), lr=self.learning_rate)) # for debugging purposes self.verbose = verbose self.running_loss = 1. self.print_every = print_every else: self.models = [] self.test_mode = True if self.prior_network: self.models.append( DQNWithPrior(dims, scale=self.prior_variance)) else: self.models.append(MLP(dims)) self.models[0].load_state_dict(torch.load(test_model_path)) self.models[0].eval() self.index = 0
'BATCH_SIZE': 32, 'DISCOUNT': 0.99, 'TARGET_UPDATE_STEPS': 100, 'LEARNING_RATE': 1e-3, 'REPLAY_BUFFER_SIZE': 1000, 'MIN_REPLAY_BUFFER_SIZE': 100, 'EPSILON_START': 1, 'EPSILON_END': 0.1, 'EPSILON_DECAY_DURATION': 5000, } # Allow changing hyperparameters from command-line arguments args = get_args(default_args=args_dict) # Create wrapped environment env = make_env(args.ENV_ID) # Set Seed set_seed(env, args.SEED) # GPU or CPU device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Create agent agent = Agent(env, device, args) # Load agent agent.load() # Test agent agent.test(render=False)
def init_agents(self): for i in range(self.population_size): agent = Agent(self.agent_hps, self.game_type, self.compute_input_channels(self.game_type)) self.agent_steps[i] = 0 self.agents.append(agent)
def init_agents(self): for i in range(self.population_size): agent = Agent(self.agent_hps, self.vision_hps) self.agent_reward_logs[i] = [] self.agents.append(agent)
def test_Agent(): def constant_prog(percept): return percept agent = Agent(constant_prog) result = agent.program(5) assert result == 5
server_public_key, public_key, private_key ) else: options = {} self.pub, self.sub = self.create_notification_client( pub_address, sub_address, options=options ) self.sub.observable.subscribe(lambda x: self.log.info(f"received: {x}")) if __name__ == "__main__": with tempfile.TemporaryDirectory() as trusted_keys_path, tempfile.TemporaryDirectory() as untrusted_keys_path: # create key pairs in corresponding directories Agent.create_curve_certificates(trusted_keys_path, "server") Agent.create_curve_certificates(trusted_keys_path, "listener") Agent.create_curve_certificates(untrusted_keys_path, "listener2") # load key pairs server_public_key, server_private_key = Agent.load_curve_certificate( os.path.join(trusted_keys_path, "server.key_secret") ) listener_public_key, listener_private_key = Agent.load_curve_certificate( os.path.join(trusted_keys_path, "listener.key_secret") ) listener2_public_key, listener2_private_key = Agent.load_curve_certificate( os.path.join(untrusted_keys_path, "listener2.key_secret") ) broker = NotificationBroker(
"num_actions": 3, "activation": "relu", # MLP "hidden_sizes": (64, ) * 2, "load_model": True, "load_model_from_path": '/home/llama/tb_logs/spatial_softmax_2019-12-02_21-26-53/Agent0_1300.pt', } agent_ids = ["Agent0"] #, "Agent1"] agents: Dict[str, Agent] = { agent_id: Agent(SpatialSoftMaxModel(agent_config), name=agent_id) #agent_id: Agent(CoordConvModel(agent_config), name=agent_id) for agent_id in agent_ids } trainer_config = { # Trainer settings "agents_to_optimize": None, # ids of agents that should be optimized "batch_size": 2000, # Agent settings "optimizer": "adam", "optimizer_kwargs": { "lr": 1e-4, "betas": (0.9, 0.999), "eps": 1e-7, "weight_decay": 0,
class MetaBestFirstSearchEnv(gym.Env): """A meta-MDP for best first search with a deterministic transition model.""" Node = namedtuple('Node', ('state', 'path', 'reward', 'done')) State = namedtuple('State', ('frontier', 'reward_to_state', 'best_done')) TERM = 'TERM' def __init__(self, env, eval_node, expansion_cost=0.01): super().__init__() self.env = env self.expansion_cost = -abs(expansion_cost) # This guy interacts with the external environment, what a chump! self.surface_agent = Agent() self.surface_agent.register(self.env) self.eval_node = eval_node def _reset(self): self.env.reset() self.model = Model( self.env) # warning: this breaks if env resets again start = self.Node(self.env._state, [], 0, False) frontier = PriorityQueue(key=self.eval_node( noisy=True)) # this is really part of the Meta Policy frontier.push(start) reward_to_state = defaultdict(lambda: -np.inf) best_done = None # Warning: state is mutable (and we mutate it!) self._state = self.State(frontier, reward_to_state, best_done) return self._state def _step(self, action): """Expand a node in the frontier.""" if action is self.TERM: # The return of one episode in the external env is # one reward in the MetaSearchEnv. trace = self._execute_plan() external_reward = trace['return'] return None, external_reward, True, {'trace': trace} else: return self._expand_node(action), self.expansion_cost, False, {} def _execute_plan(self): frontier, reward_to_state, best_done = self._state if not best_done: raise RuntimeError('Cannot make plan.') policy = FixedPlanPolicy(best_done.path) self.surface_agent.register(policy) trace = self.surface_agent.run_episode(reset=False) return trace # elif frontier: # plan = min(best_done, frontier.pop(), key=eval_node) # plan = frontier.pop() def _expand_node(self, node): frontier, reward_to_state, best_done = self._state s0, p0, r0, _ = node for a, s1, r, done in self.model.options(s0): node1 = self.Node(s1, p0 + [a], r0 + r, done) if node1.reward <= reward_to_state[s1] - 0.002: continue # cannot be better than an existing node reward_to_state[s1] = node1.reward if done: best_done = max((best_done, node1), key=self.eval_node(noisy=False)) else: frontier.push(node1) self._state = self.State(frontier, reward_to_state, best_done) return self._state
from skopt import gp_minimize import pandas as pd import numpy as np from agents import Agent from policies import LiederPolicy, FixedPlanPolicy, MaxQPolicy from value_functions import LiederQ from contexttimer import Timer from toolz import partition_all from joblib import Parallel, delayed from utils import cum_returns __ENVS = None __AGENT = Agent() __CHUNKS = None def eval_one(i): __AGENT.register(__ENVS[i]) return __AGENT.run_episode()['return'] def eval_chunk(i, return_mean=True): # Each process should start with a different random seed. np.random.seed(np.random.randint(1000) + i) returns = [] for env in __CHUNKS[i]: __AGENT.register(env) returns.append(__AGENT.run_episode()['return']) if return_mean: return np.mean(returns)
class Environment: def __init__(self, a_params, p_params, f_params, vel, handlers=None, view=True, std_dev=0, frict=0.05): ''' Environment class that contains all necessary components to configure and run scenarios. a_params::dict -- parameters for the Blue Agent p_params::dict -- parameters for the Green Agent f_params::dict -- parameters for the Fireball vel::tuple -- velcoties associated with each agent in the scenario handlers::tuple -- optional collision handlers view::bool -- flag for whether you want to view the scenario or not frict::float -- friction value for pymunk physics std_dev::float -- standard deviation value for noisy counterfactual simulation ''' self.view = view self.std_dev = std_dev # Objects in environent self.agent = Agent(a_params['loc'][0], a_params['loc'][1], a_params['color'], a_params['coll'], a_params['moves']) self.patient = Agent(p_params['loc'][0], p_params['loc'][1], p_params['color'], p_params['coll'], p_params['moves']) self.fireball = Agent(f_params['loc'][0], f_params['loc'][1], f_params['color'], f_params['coll'], f_params['moves']) # Initial location of objects in environment self.p_loc = p_params['loc'] self.a_loc = a_params['loc'] self.f_loc = f_params['loc'] # Pymunk space friction self.friction = frict # Agent velocities self.vel = vel self.pf_lock = False self.af_lock = False self.ap_lock = False # Engine parameters self.space = None self.screen = None self.options = None self.clock = None # Collision handlers self.coll_handlers = [x for x in handlers] if handlers else handlers # Values needed for rendering the scenario in Blender self.tick = 0 self.agent_collision = None self.agent_patient_collision = None self.agent_fireball_collision = None self.patient_fireball_collision = 0 self.position_dict = {'agent': [], 'patient': [], 'fireball': []} self.screen_size = (1000, 600) # Configure and run environment self.configure() def configure(self): ''' Configuration method for Environments. Sets up the pymunk space for scenarios. ''' # Configure pymunk space and pygame engine parameters (if any) if self.view: pygame.init() self.screen = pygame.display.set_mode((1000, 600)) self.options = pymunk.pygame_util.DrawOptions(self.screen) self.clock = pygame.time.Clock() self.space = pymunk.Space() self.space.damping = self.friction # Configure collision handlers (if any) if self.coll_handlers: for ob1, ob2, rem in self.coll_handlers: ch = self.space.add_collision_handler(ob1, ob2) ch.data["surface"] = self.screen ch.post_solve = rem # Add agents to the pymunk space self.space.add(self.agent.body, self.agent.shape, self.patient.body, self.patient.shape, self.fireball.body, self.fireball.shape) def update_blender_values(self): ''' All scenarios are rendered in the physics engine Blender. In order to do this, we store relevant values such as object position, simulation tick count, and collision in a JSON file. This file is passed into a bash script that uses it to render the relevant scenario in Blender. This method is used to update the JSON files for each scenario. ''' # Append positional information to the dict self.position_dict['agent'].append({ 'x': self.agent.body.position[0], 'y': self.agent.body.position[1] }) self.position_dict['patient'].append({ 'x': self.patient.body.position[0], 'y': self.patient.body.position[1] }) self.position_dict['fireball'].append({ 'x': self.fireball.body.position[0], 'y': self.fireball.body.position[1] }) # Record when the Agent collides with someone else if handlers.PF_COLLISION and not self.pf_lock: self.agent_collision = self.tick self.pf_lock = True if handlers.AP_COLLISION and not self.ap_lock: self.agent_patient_collision = self.tick self.ap_lock = True if handlers.AF_COLLISION and not self.af_lock: self.agent_fireball_collision = self.tick self.af_lock = True def run(self, video=False, filename=""): ''' Forward method for Environments. Actually runs the scenarios you view on (or off) screen. video::bool -- whether you want to record the simulation filename::str -- the name of the video file ''' # Agent velocities a_vel, p_vel, f_vel = self.vel # Agent action generators (yield actions of agents) a_generator = self.agent.act(a_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) p_generator = self.patient.act(p_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) f_generator = self.fireball.act(f_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) # Running flag running = True # Video creation save_screen = make_video(self.screen) # Main loop. Run simulation until collision between Green Agent # and Fireball while running and not handlers.PF_COLLISION: try: # Generate the next tick in the simulation for each object next(a_generator) next(p_generator) next(f_generator) # Render space on screen (if requested) if self.view: self.screen.fill((255, 255, 255)) self.space.debug_draw(self.options) pygame.display.flip() self.clock.tick(50) self.space.step(1 / 50.0) # Update the values for the Blender JSON file self.update_blender_values() # Increment the simulation tick self.tick += 1 if video: next(save_screen) except Exception as e: running = False if self.view: pygame.quit() pygame.display.quit() # Record whether Green Agent and Fireball collision occurred self.patient_fireball_collision = 1 if handlers.PF_COLLISION else 0 # Reset collision handler handlers.PF_COLLISION = [] handlers.AP_COLLISION = [] handlers.AF_COLLISION = [] if video: vid_from_img(filename) def counterfactual_run(self, std_dev, video=False, filename=''): ''' Forward method for Environments. Actually runs the scenarios you view on (or off) screen. std_dev::float -- noise parameter for simulation video::bool -- whether you want to record the simulation filename::str -- file name for video ''' # We remove the agent from the environment self.space.remove(self.space.shapes[0]) self.space.remove(self.space.bodies[0]) # Reinitialize pygame pygame.init() # If viewing, draw simulaiton to screen if self.view: pygame.init() self.screen = pygame.display.set_mode((1000, 600)) self.options = pymunk.pygame_util.DrawOptions(self.screen) self.clock = pygame.time.Clock() # Set noise parameter self.std_dev = std_dev save_screen = make_video(self.screen) # Agent velocities _, p_vel, f_vel = self.vel # Counterfactual ticks for agents self.patient.counterfactual_tick = self.agent_patient_collision self.fireball.counterfactual_tick = self.agent_fireball_collision # Agent action generators (yield actions of agents) p_generator = self.patient.act(p_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) f_generator = self.fireball.act(f_vel, self.clock, self.screen, self.space, self.options, self.view, self.std_dev) # Running flag running = True # Main loop. Run simulation until collision between Green Agent # and Fireball while running and not handlers.PF_COLLISION: try: # Generate the next tick in the simulation for each object next(p_generator) next(f_generator) # Render space on screen (if requested) if self.view: self.screen.fill((255, 255, 255)) self.space.debug_draw(self.options) pygame.display.flip() self.clock.tick(50) self.space.step(1 / 50.0) # Update the values for the Blender JSON file self.update_blender_values() # Increment the simulation tick self.tick += 1 # Increment ticks in agents self.patient.tick = self.tick self.fireball.tick = self.tick if video: next(save_screen) except: running = False if self.view: pygame.quit() pygame.display.quit() # Record whether Green Agent and Fireball collision occurred self.patient_fireball_collision = 1 if handlers.PF_COLLISION else 0 # Reset collision handler handlers.PF_COLLISION = [] handlers.AP_COLLISION = [] handlers.AF_COLLISION = [] if video: vid_from_img(filename)
_d_log = "logs" _f_info = "_info.log" _f_checkpoint = "_checkpoint" _episode = 0 _scores = [] if os.path.exists(_f_info): with open(_f_info, "r") as _f: for _line in _f.readlines(): _a, _b = _line.split("\t") _episode = int(_a) _scores.append(float(_b)) _n_games = _episode + 5000 _agent = Agent((8,), 4) if os.path.exists(_f_checkpoint): _agent.net.load_checkpoint(_f_checkpoint) _writer = SummaryWriter(_d_log) _is_quit = False while _episode < _n_games: _observation = _env.reset() _done = False _score = 0.0 while not _done: _action = _agent.get_action(_observation) _next_observation, _reward, _done, _info = _env.step(_action) _score += _reward _agent.learn(_observation, _reward, _next_observation, _done) _observation = _next_observation
def __init__(self, a_params, p_params, f_params, vel, handlers=None, view=True, std_dev=0, frict=0.05): ''' Environment class that contains all necessary components to configure and run scenarios. a_params::dict -- parameters for the Blue Agent p_params::dict -- parameters for the Green Agent f_params::dict -- parameters for the Fireball vel::tuple -- velcoties associated with each agent in the scenario handlers::tuple -- optional collision handlers view::bool -- flag for whether you want to view the scenario or not frict::float -- friction value for pymunk physics std_dev::float -- standard deviation value for noisy counterfactual simulation ''' self.view = view self.std_dev = std_dev # Objects in environent self.agent = Agent(a_params['loc'][0], a_params['loc'][1], a_params['color'], a_params['coll'], a_params['moves']) self.patient = Agent(p_params['loc'][0], p_params['loc'][1], p_params['color'], p_params['coll'], p_params['moves']) self.fireball = Agent(f_params['loc'][0], f_params['loc'][1], f_params['color'], f_params['coll'], f_params['moves']) # Initial location of objects in environment self.p_loc = p_params['loc'] self.a_loc = a_params['loc'] self.f_loc = f_params['loc'] # Pymunk space friction self.friction = frict # Agent velocities self.vel = vel self.pf_lock = False self.af_lock = False self.ap_lock = False # Engine parameters self.space = None self.screen = None self.options = None self.clock = None # Collision handlers self.coll_handlers = [x for x in handlers] if handlers else handlers # Values needed for rendering the scenario in Blender self.tick = 0 self.agent_collision = None self.agent_patient_collision = None self.agent_fireball_collision = None self.patient_fireball_collision = 0 self.position_dict = {'agent': [], 'patient': [], 'fireball': []} self.screen_size = (1000, 600) # Configure and run environment self.configure()
def setUp(self) -> None: self.test_players = [Agent(), Agent()]
pi_historical = [] w_historical = [] r_historical = [] t_historical = [] # For intra-algo mu_bar_historical = [] mu_historical = [] # Init the Agent's environment env = Environment() # Init the expert agent # Feed it the expert trajectories a = Agent(type='expert', action_list=['l', 'r'], environment=env, trajectories=[['r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r']]) # Build said expert trajectories a.build_trajectories() # Build the Agent's initial state distribution a.build_D() # Init a standalone environment for the state itself simul_env = Environment() # Init the simulation sim = Simulation(agents=a, environment=simul_env, alpha=.02) # This method will initalize a matrix of state-action pairs and their values (currently set to init all
from disc_bertrand import DiscrBertrand from config import avg_profit_gain # Parameters env = DiscrBertrand() from config import PARAMS GAMMA = PARAMS[0] ALPHA = PARAMS[1] BETA = PARAMS[2] NUM_EPISODES = PARAMS[3].astype(int) nA = PARAMS[5].astype(int) ITER_BREAK = PARAMS[7].astype(int) CONV = PARAMS[8].astype(int) # Objects agent1 = Agent() agent2 = Agent() # Initializations writer = SummaryWriter(comment="-q-iteration") iter_no = 0 # Q learning Algorithm profits = np.zeros((ITER_BREAK + 2, NUM_EPISODES + 2)) for ep in range(NUM_EPISODES): print(ep) # 1: initialise Qs env.reset() agent1.reset() agent2.reset()
f.write("\n max_num_steps: " + str(max_num_steps)) f.write("\n num_steps: " + str(num_steps)) f.write("\n mini_batch_size: " + str(mini_batch_size)) f.write("\n ppo_epochs: " + str(ppo_epochs)) f.write("\n GAMMA: " + str(GAMMA)) f.write("\n GAE_LAMBDA: " + str(GAE_LAMBDA)) f.write("\n PPO_EPSILON: " + str(PPO_EPSILON)) f.write("\n CRICIC_DISCOUNT: " + str(CRICIC_DISCOUNT)) f.write("\n ENTROPY_BETA: " + str(ENTROPY_BETA)) f.write("\n eta: " + str(eta)) f.write("\n LSTM: Yes") f.write("\n Architecture: 1") f.close() agent = Agent(state_size_map, state_size_depth , state_size_goal, num_outputs, hidden_size, stack_size, lstm_layers,load_model, MODELPATH, lr, mini_batch_size, num_envs, lr_decay_epoch, init_lr,writer, eta) max_frames = 500000 test_rewards = [] episode_length = [] for i in range(0, num_envs): episode_length.append(max_num_steps) envs.set_episode_length(episode_length) early_stop = True best_reward = 0 map_state,depth_state, goal_state = envs.reset()
def play(forever, w, h): global records, returnsBool, steps #variables for graphing countTo10 = 0 myCount = 0 counter = [0 for i in range(10)] num = w * h gamma = 0.9 e = 0.3 pts = [0 for i in range(int(forever / 10))] for i in range(forever): # reset the rewards boolean value returnsBool = np.zeros(shape=(3, 3, 3, 3, 3, 3, 3, 3, 8)) records = [] # reset #intialize the board agent = Agent(w, h) ships = agent.ships board = agent.enemyBoard actionSet = {i for i in range(num)} win = False while not win: # select a random action from the actionSet action = random.choice(tuple(actionSet)) actionSet.remove(action) y = int(action / w) x = action % h hit = checkHit([y, x], ships, board) state = getState([y, x], board, w, h) #print("rand") if hit: # if action was a hit, enter Monte Carlo guessing records = [] # reset win = monteCarlo([y, x], ships, board, e, w, h, gamma) else: # update the board location to be a miss board[y][x] = 1 # for graphing purposes ## print(steps/3) counter[countTo10] = steps / 3 # ave num of hits to sink a ship countTo10 += 1 steps = 0 if countTo10 == 10: countTo10 = 0 pts[myCount] = statistics.mean(counter) myCount += 1 episodes = np.array([i for i in range(1, forever + 1, 10)]) pts = np.array(pts) plt.figure(1) plt.plot(episodes, pts) plt.xlabel('Number of Episodes') plt.ylabel('Time Steps to Sink a Ship') plt.title('Convergence of Monte Carlo') plt.show() return board
def setup(model): model.reset() model.add_agent(Agent())
'LEARNING_RATE': 5e-4, 'REPLAY_BUFFER_SIZE': 100000, 'MIN_REPLAY_BUFFER_SIZE': 1000, 'EPSILON_START': 1, 'EPSILON_END': 0.1, 'EPSILON_DECAY_DURATION': 50000, } # Allow changing hyperparameters from command-line arguments args = get_args(default_args=args_dict) # Create wrapped environment env = make_env(args.ENV_ID) # Set Seed set_seed(env, args.SEED) # GPU or CPU device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Create agent agent = Agent(env, device, args) # Train agent for args.NB_FRAMES agent.train() # Save agent agent.save() # Test agent agent.test(render=False)
def create_all(self, pops, regions): """Based on regions and population data, create agents, families, houses, and firms""" agent_id = 0 house_id = 0 family_id = 0 firm_id = 0 my_agents = {} my_families = {} my_houses = {} my_firms = {} for region_id, region in regions.items(): logger.info('Generating region {}'.format(region_id)) num_houses = 0 regional_agents = {} pop_cols = list(list(pops.values())[0].columns) if not self.sim.PARAMS['SIMPLIFY_POP_EVOLUTION']: list_of_possible_ages = pop_cols[1:] else: list_of_possible_ages = [0] + pop_cols[1:] loop_age_control = list(list_of_possible_ages) loop_age_control.pop(0) for age in loop_age_control: for gender in ['male', 'female']: cod_mun = region.id pop = pop_age_data( pops[gender], cod_mun, age, self.sim.PARAMS['PERCENTAGE_ACTUAL_POP']) for individual in range(pop): # Qualification # To see a histogram check test: qualification = self.qual(cod_mun) r_age = self.seed.randint( list_of_possible_ages[ (list_of_possible_ages.index(age, ) - 1)] + 1, age) money = self.seed.randrange(50, 100) month = self.seed.randrange(1, 13, 1) a = Agent(agent_id, gender, r_age, qualification, money, month) regional_agents[agent_id] = a agent_id += 1 num_houses += 1 for agent in regional_agents.keys(): my_agents[agent] = regional_agents[agent] num_families = int(num_houses / self.sim.PARAMS['MEMBERS_PER_FAMILY']) num_houses = int(num_houses / self.sim.PARAMS['MEMBERS_PER_FAMILY'] * (1 + self.sim.PARAMS['HOUSE_VACANCY'])) num_firms = int(num_emp[num_emp['cod_mun'] == int( region.id)]['num_est'].iloc[0] * self.sim.PARAMS['PERCENTAGE_ACTUAL_POP']) regional_families = (self.create_family(num_families, family_id)) family_id += num_families regional_houses = self.create_household(num_houses, region, house_id) house_id += num_houses regional_firms = self.create_firm(num_firms, region, firm_id) firm_id += num_firms for family in regional_families.keys(): my_families[family] = regional_families[family] for house in regional_houses.keys(): my_houses[house] = regional_houses[house] for firm in regional_firms.keys(): my_firms[firm] = regional_firms[firm] regional_agents, regional_families = self.allocate_to_family( regional_agents, regional_families) regional_families = self.allocate_to_households( regional_families, regional_houses) # Set ownership of remaining houses for random families for house in regional_houses.keys(): if regional_houses[house].owner_id is None: family = self.seed.choice(list(regional_families.keys())) regional_houses[house].owner_id = regional_families[ family].id return my_agents, my_houses, my_families, my_firms
print('Rounds played: ' + str(num_rounds)) return G if __name__ == '__main__': G = makegraphs.ec_toy() c = 2 agentlist = defaultdict(Agent) #id num, utility, endowment, prices, subplans, #middlemen. Agent 1 degree central, agent 2 between central agentlist[2] = Agent(2, np.array((10, 1)), np.array((0.01, 0.98)), np.array((10, 10)), loan=LOAN_AMT) agentlist[1] = Agent(1, np.array((10, 10)), np.array((0.01, 0.01)), np.array((10, 10)), loan=LOAN_AMT) #type 1 util player agentlist[3] = Agent(3, np.array((1, 10)), np.array((0.98, 0.01)), np.array((1, 10)), loan=LOAN_AMT) agentlist[6] = Agent(6,