Example #1
0
 def __init__(self,
              sim,
              brain,
              name="QLearn",
              train_every_nth=5,
              train_batch_size=32,
              max_experience=300000,
              exploration_period=10000,
              epsilon_final=0.015,
              discount_factor=0.99):
   Agent.__init__(self, name)
   self.sim = sim
   self.brain = brain
   self.train_every_nth = train_every_nth
   self.train_batch_size = train_batch_size
   self.epsilon_final = epsilon_final
   self.discount_factor = discount_factor
   self.max_experience = max_experience
   self.exploration_period = exploration_period
   self.actions_executed = 0
   self.memory = []
def main():
    """Runs everything needed by the agent"""
    settings = Settings()
    agent = Agent(settings)
    agent.start()

    # The agent is executing
    try:
        agent.join()
    except KeyboardInterrupt:
        agent.stop()
Example #3
0
    def setUp(self):
        Supervisors, Projects, Students = Hierarchy(3), Hierarchy(2), Hierarchy(1)
        """"
        Some Supervisors
        """
        self.KB = Agent("1", Supervisors, capacities=[0, 1], preferences=None, abilities=None, name="Ken Brown")
        self.UK = Agent("2", Supervisors, capacities=[1, 2], preferences=None, abilities=None, name="Uli Kraehmer")
        self.RS = Agent("3", Supervisors, capacities=[2, 2], preferences=None, abilities=None, name="Richard Steiner")
        self.TB = Agent("4", Supervisors, capacities=[1,3], preferences=None, abilities=None, name="Tara Brendle")
        self.AB = Agent("5", Supervisors, capacities=[0,1], preferences=None, abilities=None, name="Andy Baker")

        """
        Some projects
        """
        self.Hopf = Agent("1", Projects, capacities=[0, 1], preferences=[self.UK, self.KB, self.AB],abilities=None
                          , name="Hopf algebras")
        self.Alg_no = Agent("2", Projects, capacities=[0, 1], preferences=[self.KB, self.AB], abilities=None,
                            name="Algebraic number theory")
        self.Cat_thy = Agent("3", Projects, capacities=[0, 1], preferences=[self.RS, self.UK], abilities=None,
                             name="Category Theory")
        self.Group_thy = Agent("4", Projects, capacities=[0, 1], preferences=[self.TB, self.AB], abilities=None,
                               name="Group Theory")
        self.Topology = Agent("5", Projects, capacities=[0, 1], preferences=[self.AB, self.TB, self.RS],
                              abilities=None, name="Topology")

        """
        Some students
        """

        self.Paul = Agent("0700874", Students, [0, 1], preferences=[self.Hopf, self.Alg_no, self.Group_thy],
                          abilities=None, name="Paul Gilmartin")
        self.Scott = Agent("0700875", Students, [0, 1], preferences=[self.Alg_no, self.Hopf], abilities=None,
                           name="Scott Gilmartin")
        self.Jim = Agent("0700876", Students, [0, 1], preferences=[self.Cat_thy, self.Topology, self.Hopf], abilities=None,
                         name="Jimmy Boyd")
        self.Rachael = Agent("0700877", Students, [0,1], preferences=[self.Group_thy, self.Alg_no, self.Cat_thy],
                             abilities=None, name="Rachael Hayhoe")
        self.Nameless = Agent("0700878", Students, [0,1], preferences=[self.Topology], abilities=None, name=None)
Example #4
0
from agents import Model, Agent, run

epidemic_model = Model("Epidemimodel", 100, 100)

epidemic_model.add_agent(Agent())

run(epidemic_model)
from agents import Agent
from simulation import Simulation
from state import Environment
import numpy as np

# TODO: CHOICE TO Ï• DICTIONARY???

# Init the Agent's environment
env = Environment()

# Init the expert agent
# Feed it the expert trajectories
a = Agent(type='expert',
          action_list=['a', 'b', 'c', 'd', 'e', 'f', 'g'],
          environment=env,
          trajectories=[['a', 'b', 'c', 'e', 'g', 'b', 'c', 'e', 'g', 'c'],
                        ['a', 'b', 'c', 'a', 'g', 'g', 'a', 'g', 'g', 'c'],
                        ['c', 'd', 'f', 'b', 'c', 'a', 'd', 'f', 'b', 'c']])

# Build said expert trajectories
a.build_trajectories()

# Build the Agent's initial state distribution
a.build_D()

# Init a standalone environment for the state itself
simul_env = Environment()

# Init the simulation
sim = Simulation(agents=a, environment=simul_env, alpha=1)
 def perform_agent_action(self, agent: Agent, action: str):
     # Perform the specified action if possible, wait otherwise
     if action == 'UP' and self.tile_is_free(agent.x, agent.y - 1):
         agent.y -= 1
         agent.add_event(e.MOVED_UP)
     elif action == 'DOWN' and self.tile_is_free(agent.x, agent.y + 1):
         agent.y += 1
         agent.add_event(e.MOVED_DOWN)
     elif action == 'LEFT' and self.tile_is_free(agent.x - 1, agent.y):
         agent.x -= 1
         agent.add_event(e.MOVED_LEFT)
     elif action == 'RIGHT' and self.tile_is_free(agent.x + 1, agent.y):
         agent.x += 1
         agent.add_event(e.MOVED_RIGHT)
     elif action == 'BOMB' and agent.bombs_left:
         self.logger.info(
             f'Agent <{agent.name}> drops bomb at {(agent.x, agent.y)}')
         self.bombs.append(
             Bomb((agent.x, agent.y),
                  agent,
                  s.BOMB_TIMER,
                  s.BOMB_POWER,
                  agent.color,
                  custom_sprite=agent.bomb_sprite))
         agent.bombs_left = False
         agent.add_event(e.BOMB_DROPPED)
     elif action == 'WAIT':
         agent.add_event(e.WAITED)
     else:
         agent.add_event(e.INVALID_ACTION)
import threading
import time
from signal import SIGINT, SIGTERM, signal

from agents import Agent, Message

# generate public and private keys for server and client
server_public_key, server_private_key = Agent.curve_keypair()
wrong_server_public_key, wrong_server_private_key = Agent.curve_keypair()
client_public_key, client_private_key = Agent.curve_keypair()
client2_public_key, client2_private_key = Agent.curve_keypair()


class NotificationBroker(Agent):
    def setup(self, name=None, pub_address=None, sub_address=None):
        self.create_notification_broker(
            pub_address,
            sub_address,
            options=self.curve_server_config(server_private_key),
        )


class Sender(Agent):
    def setup(self, name=None, pub_address=None, sub_address=None):
        self.counter = 0
        self.pub, self.sub = self.create_notification_client(
            pub_address,
            sub_address,
            options=self.curve_client_config(server_public_key,
                                             client_public_key,
                                             client_private_key),
def run_rotate():
    '''
    Runs the simulations and saves the JSON files with an arbitrary rotation
    about the center of the screen.
    '''
    def rotate(obj,theta=-20,origin=(500,300)):
        '''
        Rotates objects about a center
        '''
        # Translate w/r to visual origin (500,300)
        obj.body.position -= Vec2d(origin)
        # Radians to degrees
        theta = radians(theta)
        x, y = obj.body.position
        x_ = x*cos(theta) - y*sin(theta)
        y_ = y*cos(theta) + x*sin(theta)
        obj.body.position = [x_,y_]
        # Translate w/r to actual origin (0,0)
        obj.body.position += Vec2d(origin)

    video = False
    thetas = list(range(-19,-9))+list(range(10,19))
    for scene in scenarios.__experiment3__:
        theta = choice(thetas)
        sim = getattr(scenarios, scene)
        env = sim(True)
        env.run()
        # Gather position data
        pos = env.position_dict
        agent_positions = env.position_dict['agent']
        patient_positions = env.position_dict['patient']
        fireball_positions = env.position_dict['fireball']

        # Setup pygame and pymunk
        space = pymunk.Space()
        space.damping = 0.05
        screen = pygame.display.set_mode((1000,600))
        options = pymunk.pygame_util.DrawOptions(screen)
        clock = pygame.time.Clock()
        if video:
            save_screen = make_video(screen)
        # Setup empty agents
        agent = Agent(0,0,'blue',0,[])
        patient = Agent(0,0,'green',0,[])
        fireball = Agent(0,0,'red',0,[])
        # Add agent to space
        space.add(agent.body, agent.shape,
                  patient.body, patient.shape,
                  fireball.body, fireball.shape)
        pygame.init()
        running = True

        while running:
            # print(agent_positions[0])
            try:
                # Extract position data
                a_pos = agent_positions.pop(0)
                p_pos = patient_positions.pop(0)
                f_pos = fireball_positions.pop(0)
                # Set positions of objects
                agent.body.position = Vec2d(a_pos['x'],a_pos['y'])
                patient.body.position = Vec2d(p_pos['x'],p_pos['y'])
                fireball.body.position = Vec2d(f_pos['x'],f_pos['y'])
                # Rotate objects about the center
                rotate(agent,theta)
                rotate(patient,theta)
                rotate(fireball,theta)
                # Render space on screen (if requested)
                screen.fill((255,255,255))
                space.debug_draw(options)
                pygame.display.flip()
                clock.tick(60)
                space.step(1/60.0)
                if video:
                    next(save_screen)
            except Exception as e:
                running = False
        pygame.quit()
        pygame.display.quit()
        if video:
            vid_from_img("final_"+scene)
Example #9
0
    #return dictionary of agents


if __name__ == "__main__":
    G = makegraphs.ringGraph(3)
    #initialize graph here

    c = 2  #number of commodities
    agentlist = defaultdict(Agent)
    for i in range(G.number_of_nodes()):
        #initialize params for agent
        u = np.zeros(c)
        e = np.ones(c)
        p = np.ones(c)
        subplans = np.zeros((G.number_of_nodes(), c))
        agentlist[i] = Agent(u, e, p, subplans)

    nx.set_node_attributes(G, 'agentprop', agentlist)

    check_eq = False
    num_rounds = 0
    while (check_eq):
        agents_old = nx.get_node_attributes(G, 'agentprop')
        agents_new = changePlans(G)
        nx.set_node_attributes(G, 'agentprop', agents_new)
        check_eq = checkEquilibrium(agents_old, agents_new)
        num_rounds += 1

    print(str(num_rounds - 1) + ' rounds needed to reach equilibrium.')

    drawNetwork(G, 'agentprop', 'test.png')
def test_agent():
    a = Agent()
    assert isinstance(a, Agent)
Example #11
0
def test_ModelBasedReflexAgentProgram():

    loc_A = (0, 0)
    loc_B = (1, 0)

    model = {loc_A: None, loc_B: None}

    class Rule:
        def __init__(self, state, action):
            self.__state = state
            self.action = action

        def matches(self, state):
            return self.__state == state

    # create rules for a two state Vacuum Environment
    rules = [
        Rule((loc_A, "Dirty"), "Suck"),
        Rule((loc_A, "Clean"), "Right"),
        Rule((loc_B, "Dirty"), "Suck"),
        Rule((loc_B, "Clean"), "Left")
    ]

    def update_state(state, action, percept, model):
        loc, status = percept

        # the other location
        loc2 = tuple(map(lambda x: x[0] - x[1], zip((1, 0), loc)))

        # initial guess of the other location
        if not state or not action or not model[loc2]:
            model[loc2] = random.choice(['Dirty', 'Clean'])

        model[loc] = status

        # the model think environment will keep clean if agent chose to suck last step
        if action == 'Suck':
            state = percept
            return state

        # rubbish may appears suddenly, so the model guess randomly
        if status == 'Clean':
            status = random.choice(['Dirty', 'Clean'])
            model[loc] = status

        # move right or left will not influence the environment
        state = (loc, model[loc])
        return state

    # create a program and then an object of the ModelBasedReflexAgentProgram

    program = ModelBasedReflexAgentProgram(rules, update_state, model)
    agent = Agent(program)

    # create an object of TrivialVacuumEnvironment
    environment = TrivialVacuumEnvironment()
    # add agent to the environment
    environment.add_thing(agent)
    # run the environment
    environment.run()
    # check final status of the environment
    assert environment.status == {(1, 0): 'Clean', (0, 0): 'Clean'}
Example #12
0
class Evaluator(J30Runner):
    def __init__(self, model_name, model):
        super().__init__(train=False)

        self.model_name = model_name
        self.model = model.model
        self.agent = Agent(self.projects, model)
        self.result = []

    def load_weights(self, number):
        self.model.load_weights('.\\models\\' + self.model_name + '\\' +
                                self.model_name + '-' + str(number) + '.h5')

    def evaluate_project(self, project) -> float:
        t = 0

        while not project.is_finished():
            t += project.next(*self.act(project))

        return t

    def evaluate(self, num_of_iterations=100):
        """Evaluates a single project for the number of iterations."""
        durations = {}

        for project in self.projects:
            project_list = np.array([
                Project(project.path, stochastic=project.stochastic)
                for _ in range(num_of_iterations)
            ])
            durations[project.path[-8:]] = np.vectorize(
                self.evaluate_project, otypes=[float])(project_list)

        return durations

    def evaluate_all(self, num_of_models, num_of_iterations=100):
        for num_of_model in range(num_of_models):
            print('evaluating model', num_of_model)
            self.load_weights(num_of_model)
            self.result.append(self.evaluate(num_of_iterations))
            pickle.dump(
                self.result,
                open(self.model_name + '-result-' + str(num_of_model), 'wb'))

    def act(self, project):
        """The action with the highest value is executed.

        This function is different from the act-function during training: If no
        tasks are running, the model cannot choose the wait/void action. This
        prevents infinite loops if the wait/void action for such a state has the
        highest q-value.

        :return: the best action and the durations of the tasks in the action
        """
        state, durations = project.state()
        actions = project.get_actions()

        if len(actions) > 1:
            best_action = self.get_best_action(state, actions, project)
            return best_action, durations
        else:
            best_action = []
            return best_action, durations

    def get_best_action(self, state, actions, project):
        inputs = np.squeeze(
            np.array([
                self.agent.input_vector(state, action) for action in actions
            ]))
        action_values = np.squeeze(self.model.predict(inputs, len(inputs)))
        max_val = np.argmax(action_values)
        # the wait/void action must not be the best action if there are no running tasks
        if len(project.running) == 0 and actions[max_val] == []:
            max_val = np.argmax(action_values[1:]) + 1
        return actions[max_val]
Example #13
0
class TestAgent_Good_Input(unittest.TestCase):

    def setUp(self):
        Supervisors, Projects, Students = Hierarchy(3), Hierarchy(2), Hierarchy(1)
        """"
        Some Supervisors
        """
        self.KB = Agent("1", Supervisors, capacities=[0, 1], preferences=None, abilities=None, name="Ken Brown")
        self.UK = Agent("2", Supervisors, capacities=[1, 2], preferences=None, abilities=None, name="Uli Kraehmer")
        self.RS = Agent("3", Supervisors, capacities=[2, 2], preferences=None, abilities=None, name="Richard Steiner")
        self.TB = Agent("4", Supervisors, capacities=[1,3], preferences=None, abilities=None, name="Tara Brendle")
        self.AB = Agent("5", Supervisors, capacities=[0,1], preferences=None, abilities=None, name="Andy Baker")

        """
        Some projects
        """
        self.Hopf = Agent("1", Projects, capacities=[0, 1], preferences=[self.UK, self.KB, self.AB],abilities=None
                          , name="Hopf algebras")
        self.Alg_no = Agent("2", Projects, capacities=[0, 1], preferences=[self.KB, self.AB], abilities=None,
                            name="Algebraic number theory")
        self.Cat_thy = Agent("3", Projects, capacities=[0, 1], preferences=[self.RS, self.UK], abilities=None,
                             name="Category Theory")
        self.Group_thy = Agent("4", Projects, capacities=[0, 1], preferences=[self.TB, self.AB], abilities=None,
                               name="Group Theory")
        self.Topology = Agent("5", Projects, capacities=[0, 1], preferences=[self.AB, self.TB, self.RS],
                              abilities=None, name="Topology")

        """
        Some students
        """

        self.Paul = Agent("0700874", Students, [0, 1], preferences=[self.Hopf, self.Alg_no, self.Group_thy],
                          abilities=None, name="Paul Gilmartin")
        self.Scott = Agent("0700875", Students, [0, 1], preferences=[self.Alg_no, self.Hopf], abilities=None,
                           name="Scott Gilmartin")
        self.Jim = Agent("0700876", Students, [0, 1], preferences=[self.Cat_thy, self.Topology, self.Hopf], abilities=None,
                         name="Jimmy Boyd")
        self.Rachael = Agent("0700877", Students, [0,1], preferences=[self.Group_thy, self.Alg_no, self.Cat_thy],
                             abilities=None, name="Rachael Hayhoe")
        self.Nameless = Agent("0700878", Students, [0,1], preferences=[self.Topology], abilities=None, name=None)



    def test_give_name_1(self):
        self.Nameless.give_name("Big Man")
        self.assertEqual("Big Man", self.Nameless.name)

    def test_give_name_2(self):
        self.Paul.give_name("Paul G")
        self.assertEqual("Paul G", self.Paul.name)

    def test_upper_capacity_1(self):
        self.assertEqual(self.KB.upper_capacity, 1)

    def test_upper_capacity_2(self):
        self.assertEqual(self.TB.upper_capacity, 3)

    def test_preference_position_1(self):
        self.assertEqual(self.Paul.preference_position(self.Hopf), 1)

    def test_preference_position_2(self):
        self.assertEqual(self.Topology.preference_position(self.RS), 3)

    def test_lower_capacity_1(self):
        self.assertEqual(self.AB.lower_capacity, 0)

    def test_lower_capacity_2(self):
        self.assertEqual(self.RS.lower_capacity, 2)

    def test_capacity_difference_1(self):
        self.assertEqual(self.RS.capacity_difference, 0)

    def test_capacity_difference_2(self):
        self.assertEqual(self.Alg_no.capacity_difference, 1)

    if __name__ == "__main__" :
        unittest.main()
Example #14
0
    def __init__(self,
                 action_set,
                 reward_function,
                 prior_variance,
                 noise_variance,
                 feature_extractor,
                 prior_network,
                 num_ensemble,
                 hidden_dims=[10, 10],
                 learning_rate=5e-4,
                 buffer_size=50000,
                 batch_size=64,
                 num_batches=100,
                 starts_learning=5000,
                 discount=0.99,
                 target_freq=10,
                 verbose=False,
                 print_every=1,
                 test_model_path=None):
        Agent.__init__(self, action_set, reward_function)

        self.prior_variance = prior_variance
        self.noise_variance = noise_variance

        self.feature_extractor = feature_extractor
        self.feature_dim = self.feature_extractor.dimension

        dims = [self.feature_dim] + hidden_dims + [len(self.action_set)]

        self.prior_network = prior_network
        self.num_ensemble = num_ensemble  # number of models in ensemble

        self.index = np.random.randint(self.num_ensemble)

        # build Q network
        # we use a multilayer perceptron

        if test_model_path is None:
            self.test_mode = False
            self.learning_rate = learning_rate
            self.buffer_size = buffer_size
            self.batch_size = batch_size
            self.num_batches = num_batches
            self.starts_learning = starts_learning
            self.discount = discount
            self.timestep = 0

            self.buffer = Buffer(self.buffer_size)
            self.models = []
            for i in range(self.num_ensemble):
                if self.prior_network:
                    '''
                    Second network is a prior network whose weights are fixed
                    and first network is difference network learned i.e, weights are mutable
                    '''
                    self.models.append(
                        DQNWithPrior(dims, scale=np.sqrt(
                            self.prior_variance)).to(device))
                else:
                    self.models.append(MLP(dims).to(device))
                self.models[i].initialize()
            '''
            prior networks weights are immutable so enough to keep difference network
            '''
            self.target_nets = []
            for i in range(self.num_ensemble):
                if self.prior_network:
                    self.target_nets.append(
                        DQNWithPrior(dims, scale=np.sqrt(
                            self.prior_variance)).to(device))
                else:
                    self.target_nets.append(MLP(dims).to(device))
                    self.target_nets[i].load_state_dict(
                        self.models[i].state_dict())
                    self.target_nets[i].eval()

            self.target_freq = target_freq  #   target nn updated every target_freq episodes
            self.num_episodes = 0

            self.optimizer = []
            for i in range(self.num_ensemble):
                self.optimizer.append(
                    torch.optim.Adam(self.models[i].parameters(),
                                     lr=self.learning_rate))

            # for debugging purposes
            self.verbose = verbose
            self.running_loss = 1.
            self.print_every = print_every

        else:
            self.models = []
            self.test_mode = True
            if self.prior_network:
                self.models.append(
                    DQNWithPrior(dims, scale=self.prior_variance))
            else:
                self.models.append(MLP(dims))
            self.models[0].load_state_dict(torch.load(test_model_path))
            self.models[0].eval()
            self.index = 0
    'BATCH_SIZE': 32,
    'DISCOUNT': 0.99,
    'TARGET_UPDATE_STEPS': 100,
    'LEARNING_RATE': 1e-3,
    'REPLAY_BUFFER_SIZE': 1000,
    'MIN_REPLAY_BUFFER_SIZE': 100,
    'EPSILON_START': 1,
    'EPSILON_END': 0.1,
    'EPSILON_DECAY_DURATION': 5000,
}
# Allow changing hyperparameters from command-line arguments
args = get_args(default_args=args_dict)

# Create wrapped environment
env = make_env(args.ENV_ID)

# Set Seed
set_seed(env, args.SEED)

# GPU or CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Create agent
agent = Agent(env, device, args)

# Load agent
agent.load()

# Test agent
agent.test(render=False)
Example #16
0
 def init_agents(self):
     for i in range(self.population_size):
         agent = Agent(self.agent_hps, self.game_type,
                       self.compute_input_channels(self.game_type))
         self.agent_steps[i] = 0
         self.agents.append(agent)
Example #17
0
 def init_agents(self):
     for i in range(self.population_size):
         agent = Agent(self.agent_hps, self.vision_hps)
         self.agent_reward_logs[i] = []
         self.agents.append(agent)
Example #18
0
def test_Agent():
    def constant_prog(percept):
        return percept
    agent = Agent(constant_prog)
    result = agent.program(5)
    assert result == 5
                server_public_key, public_key, private_key
            )
        else:
            options = {}
        self.pub, self.sub = self.create_notification_client(
            pub_address, sub_address, options=options
        )
        self.sub.observable.subscribe(lambda x: self.log.info(f"received: {x}"))


if __name__ == "__main__":

    with tempfile.TemporaryDirectory() as trusted_keys_path, tempfile.TemporaryDirectory() as untrusted_keys_path:

        # create key pairs in corresponding directories
        Agent.create_curve_certificates(trusted_keys_path, "server")
        Agent.create_curve_certificates(trusted_keys_path, "listener")
        Agent.create_curve_certificates(untrusted_keys_path, "listener2")

        # load key pairs
        server_public_key, server_private_key = Agent.load_curve_certificate(
            os.path.join(trusted_keys_path, "server.key_secret")
        )
        listener_public_key, listener_private_key = Agent.load_curve_certificate(
            os.path.join(trusted_keys_path, "listener.key_secret")
        )
        listener2_public_key, listener2_private_key = Agent.load_curve_certificate(
            os.path.join(untrusted_keys_path, "listener2.key_secret")
        )

        broker = NotificationBroker(
Example #20
0
    "num_actions":
    3,
    "activation":
    "relu",

    # MLP
    "hidden_sizes": (64, ) * 2,
    "load_model":
    True,
    "load_model_from_path":
    '/home/llama/tb_logs/spatial_softmax_2019-12-02_21-26-53/Agent0_1300.pt',
}

agent_ids = ["Agent0"]  #, "Agent1"]
agents: Dict[str, Agent] = {
    agent_id: Agent(SpatialSoftMaxModel(agent_config), name=agent_id)
    #agent_id: Agent(CoordConvModel(agent_config), name=agent_id)
    for agent_id in agent_ids
}

trainer_config = {
    # Trainer settings
    "agents_to_optimize": None,  # ids of agents that should be optimized
    "batch_size": 2000,
    # Agent settings
    "optimizer": "adam",
    "optimizer_kwargs": {
        "lr": 1e-4,
        "betas": (0.9, 0.999),
        "eps": 1e-7,
        "weight_decay": 0,
Example #21
0
class MetaBestFirstSearchEnv(gym.Env):
    """A meta-MDP for best first search with a deterministic transition model."""
    Node = namedtuple('Node', ('state', 'path', 'reward', 'done'))
    State = namedtuple('State', ('frontier', 'reward_to_state', 'best_done'))
    TERM = 'TERM'

    def __init__(self, env, eval_node, expansion_cost=0.01):
        super().__init__()
        self.env = env
        self.expansion_cost = -abs(expansion_cost)

        # This guy interacts with the external environment, what a chump!
        self.surface_agent = Agent()
        self.surface_agent.register(self.env)
        self.eval_node = eval_node

    def _reset(self):
        self.env.reset()
        self.model = Model(
            self.env)  # warning: this breaks if env resets again
        start = self.Node(self.env._state, [], 0, False)
        frontier = PriorityQueue(key=self.eval_node(
            noisy=True))  # this is really part of the Meta Policy
        frontier.push(start)
        reward_to_state = defaultdict(lambda: -np.inf)
        best_done = None
        # Warning: state is mutable (and we mutate it!)
        self._state = self.State(frontier, reward_to_state, best_done)
        return self._state

    def _step(self, action):
        """Expand a node in the frontier."""
        if action is self.TERM:
            # The return of one episode in the external env is
            # one reward in the MetaSearchEnv.
            trace = self._execute_plan()
            external_reward = trace['return']
            return None, external_reward, True, {'trace': trace}
        else:
            return self._expand_node(action), self.expansion_cost, False, {}

    def _execute_plan(self):
        frontier, reward_to_state, best_done = self._state

        if not best_done:
            raise RuntimeError('Cannot make plan.')

        policy = FixedPlanPolicy(best_done.path)
        self.surface_agent.register(policy)
        trace = self.surface_agent.run_episode(reset=False)
        return trace

        # elif frontier:
        #     plan = min(best_done, frontier.pop(), key=eval_node)
        #     plan = frontier.pop()

    def _expand_node(self, node):
        frontier, reward_to_state, best_done = self._state
        s0, p0, r0, _ = node

        for a, s1, r, done in self.model.options(s0):
            node1 = self.Node(s1, p0 + [a], r0 + r, done)
            if node1.reward <= reward_to_state[s1] - 0.002:
                continue  # cannot be better than an existing node
            reward_to_state[s1] = node1.reward
            if done:
                best_done = max((best_done, node1),
                                key=self.eval_node(noisy=False))
            else:
                frontier.push(node1)

        self._state = self.State(frontier, reward_to_state, best_done)
        return self._state
from skopt import gp_minimize
import pandas as pd
import numpy as np

from agents import Agent
from policies import LiederPolicy, FixedPlanPolicy, MaxQPolicy
from value_functions import LiederQ
from contexttimer import Timer

from toolz import partition_all
from joblib import Parallel, delayed

from utils import cum_returns

__ENVS = None
__AGENT = Agent()
__CHUNKS = None

def eval_one(i):
    __AGENT.register(__ENVS[i])
    return __AGENT.run_episode()['return']

def eval_chunk(i, return_mean=True):
    # Each process should start with a different random seed.
    np.random.seed(np.random.randint(1000) + i)
    returns = []
    for env in __CHUNKS[i]:
        __AGENT.register(env)
        returns.append(__AGENT.run_episode()['return'])
    if return_mean:
        return np.mean(returns)
Example #23
0
class Environment:
    def __init__(self,
                 a_params,
                 p_params,
                 f_params,
                 vel,
                 handlers=None,
                 view=True,
                 std_dev=0,
                 frict=0.05):
        '''
		Environment class that contains all necessary components to configure
		and run scenarios.

		a_params::dict -- parameters for the Blue Agent
		p_params::dict -- parameters for the Green Agent
		f_params::dict -- parameters for the Fireball
		vel::tuple     -- velcoties associated with each agent in the scenario
		handlers::tuple -- optional collision handlers
		view::bool     -- flag for whether you want to view the scenario or not
		frict::float   -- friction value for pymunk physics
		std_dev::float -- standard deviation value for noisy counterfactual simulation
		'''
        self.view = view
        self.std_dev = std_dev
        # Objects in environent
        self.agent = Agent(a_params['loc'][0], a_params['loc'][1],
                           a_params['color'], a_params['coll'],
                           a_params['moves'])
        self.patient = Agent(p_params['loc'][0], p_params['loc'][1],
                             p_params['color'], p_params['coll'],
                             p_params['moves'])
        self.fireball = Agent(f_params['loc'][0], f_params['loc'][1],
                              f_params['color'], f_params['coll'],
                              f_params['moves'])
        # Initial location of objects in environment
        self.p_loc = p_params['loc']
        self.a_loc = a_params['loc']
        self.f_loc = f_params['loc']
        # Pymunk space friction
        self.friction = frict
        # Agent velocities
        self.vel = vel
        self.pf_lock = False
        self.af_lock = False
        self.ap_lock = False
        # Engine parameters
        self.space = None
        self.screen = None
        self.options = None
        self.clock = None
        # Collision handlers
        self.coll_handlers = [x for x in handlers] if handlers else handlers
        # Values needed for rendering the scenario in Blender
        self.tick = 0
        self.agent_collision = None
        self.agent_patient_collision = None
        self.agent_fireball_collision = None
        self.patient_fireball_collision = 0
        self.position_dict = {'agent': [], 'patient': [], 'fireball': []}
        self.screen_size = (1000, 600)
        # Configure and run environment
        self.configure()

    def configure(self):
        '''
		Configuration method for Environments. Sets up the pymunk space
		for scenarios.
		'''
        # Configure pymunk space and pygame engine parameters (if any)
        if self.view:
            pygame.init()
            self.screen = pygame.display.set_mode((1000, 600))
            self.options = pymunk.pygame_util.DrawOptions(self.screen)
            self.clock = pygame.time.Clock()
        self.space = pymunk.Space()
        self.space.damping = self.friction
        # Configure collision handlers (if any)
        if self.coll_handlers:
            for ob1, ob2, rem in self.coll_handlers:
                ch = self.space.add_collision_handler(ob1, ob2)
                ch.data["surface"] = self.screen
                ch.post_solve = rem
        # Add agents to the pymunk space
        self.space.add(self.agent.body, self.agent.shape, self.patient.body,
                       self.patient.shape, self.fireball.body,
                       self.fireball.shape)

    def update_blender_values(self):
        '''
		All scenarios are rendered in the physics engine Blender. In order to do this,
		we store relevant values such as object position, simulation tick count, and
		collision in a JSON file. This file is passed into a bash script that uses it
		to render the relevant scenario in Blender. 

		This method is used to update the JSON files for each scenario.
		'''
        # Append positional information to the dict
        self.position_dict['agent'].append({
            'x': self.agent.body.position[0],
            'y': self.agent.body.position[1]
        })
        self.position_dict['patient'].append({
            'x': self.patient.body.position[0],
            'y': self.patient.body.position[1]
        })
        self.position_dict['fireball'].append({
            'x':
            self.fireball.body.position[0],
            'y':
            self.fireball.body.position[1]
        })
        # Record when the Agent collides with someone else
        if handlers.PF_COLLISION and not self.pf_lock:
            self.agent_collision = self.tick
            self.pf_lock = True
        if handlers.AP_COLLISION and not self.ap_lock:
            self.agent_patient_collision = self.tick
            self.ap_lock = True
        if handlers.AF_COLLISION and not self.af_lock:
            self.agent_fireball_collision = self.tick
            self.af_lock = True

    def run(self, video=False, filename=""):
        '''
		Forward method for Environments. Actually runs the scenarios you
		view on (or off) screen.

		video::bool   -- whether you want to record the simulation
		filename::str -- the name of the video file
		'''
        # Agent velocities
        a_vel, p_vel, f_vel = self.vel
        # Agent action generators (yield actions of agents)
        a_generator = self.agent.act(a_vel, self.clock, self.screen,
                                     self.space, self.options, self.view,
                                     self.std_dev)
        p_generator = self.patient.act(p_vel, self.clock, self.screen,
                                       self.space, self.options, self.view,
                                       self.std_dev)
        f_generator = self.fireball.act(f_vel, self.clock, self.screen,
                                        self.space, self.options, self.view,
                                        self.std_dev)
        # Running flag
        running = True
        # Video creation
        save_screen = make_video(self.screen)
        # Main loop. Run simulation until collision between Green Agent
        # 	and Fireball
        while running and not handlers.PF_COLLISION:
            try:
                # Generate the next tick in the simulation for each object
                next(a_generator)
                next(p_generator)
                next(f_generator)
                # Render space on screen (if requested)
                if self.view:
                    self.screen.fill((255, 255, 255))
                    self.space.debug_draw(self.options)
                    pygame.display.flip()
                    self.clock.tick(50)
                self.space.step(1 / 50.0)
                # Update the values for the Blender JSON file
                self.update_blender_values()
                # Increment the simulation tick
                self.tick += 1
                if video:
                    next(save_screen)
            except Exception as e:
                running = False
        if self.view:
            pygame.quit()
            pygame.display.quit()
        # Record whether Green Agent and Fireball collision occurred
        self.patient_fireball_collision = 1 if handlers.PF_COLLISION else 0
        # Reset collision handler
        handlers.PF_COLLISION = []
        handlers.AP_COLLISION = []
        handlers.AF_COLLISION = []
        if video:
            vid_from_img(filename)

    def counterfactual_run(self, std_dev, video=False, filename=''):
        '''
		Forward method for Environments. Actually runs the scenarios you
		view on (or off) screen.

		std_dev::float -- noise parameter for simulation
		video::bool    -- whether you want to record the simulation
		filename::str  -- file name for video
		'''
        # We remove the agent from the environment
        self.space.remove(self.space.shapes[0])
        self.space.remove(self.space.bodies[0])
        # Reinitialize pygame
        pygame.init()
        # If viewing, draw simulaiton to screen
        if self.view:
            pygame.init()
            self.screen = pygame.display.set_mode((1000, 600))
            self.options = pymunk.pygame_util.DrawOptions(self.screen)
            self.clock = pygame.time.Clock()
        # Set noise parameter
        self.std_dev = std_dev
        save_screen = make_video(self.screen)
        # Agent velocities
        _, p_vel, f_vel = self.vel
        # Counterfactual ticks for agents
        self.patient.counterfactual_tick = self.agent_patient_collision
        self.fireball.counterfactual_tick = self.agent_fireball_collision
        # Agent action generators (yield actions of agents)
        p_generator = self.patient.act(p_vel, self.clock, self.screen,
                                       self.space, self.options, self.view,
                                       self.std_dev)
        f_generator = self.fireball.act(f_vel, self.clock, self.screen,
                                        self.space, self.options, self.view,
                                        self.std_dev)
        # Running flag
        running = True
        # Main loop. Run simulation until collision between Green Agent
        # 	and Fireball
        while running and not handlers.PF_COLLISION:
            try:
                # Generate the next tick in the simulation for each object
                next(p_generator)
                next(f_generator)
                # Render space on screen (if requested)
                if self.view:
                    self.screen.fill((255, 255, 255))
                    self.space.debug_draw(self.options)
                    pygame.display.flip()
                    self.clock.tick(50)
                self.space.step(1 / 50.0)
                # Update the values for the Blender JSON file
                self.update_blender_values()
                # Increment the simulation tick
                self.tick += 1
                # Increment ticks in agents
                self.patient.tick = self.tick
                self.fireball.tick = self.tick
                if video:
                    next(save_screen)
            except:
                running = False
        if self.view:
            pygame.quit()
            pygame.display.quit()
        # Record whether Green Agent and Fireball collision occurred
        self.patient_fireball_collision = 1 if handlers.PF_COLLISION else 0
        # Reset collision handler
        handlers.PF_COLLISION = []
        handlers.AP_COLLISION = []
        handlers.AF_COLLISION = []
        if video:
            vid_from_img(filename)
Example #24
0
    _d_log = "logs"
    _f_info = "_info.log"
    _f_checkpoint = "_checkpoint"

    _episode = 0
    _scores = []
    if os.path.exists(_f_info):
        with open(_f_info, "r") as _f:
            for _line in _f.readlines():
                _a, _b = _line.split("\t")
                _episode = int(_a)
                _scores.append(float(_b))
    _n_games = _episode + 5000

    _agent = Agent((8,), 4)
    if os.path.exists(_f_checkpoint):
        _agent.net.load_checkpoint(_f_checkpoint)

    _writer = SummaryWriter(_d_log)
    _is_quit = False
    while _episode < _n_games:
        _observation = _env.reset()
        _done = False
        _score = 0.0
        while not _done:
            _action = _agent.get_action(_observation)
            _next_observation, _reward, _done, _info = _env.step(_action)
            _score += _reward
            _agent.learn(_observation, _reward, _next_observation, _done)
            _observation = _next_observation
Example #25
0
    def __init__(self,
                 a_params,
                 p_params,
                 f_params,
                 vel,
                 handlers=None,
                 view=True,
                 std_dev=0,
                 frict=0.05):
        '''
		Environment class that contains all necessary components to configure
		and run scenarios.

		a_params::dict -- parameters for the Blue Agent
		p_params::dict -- parameters for the Green Agent
		f_params::dict -- parameters for the Fireball
		vel::tuple     -- velcoties associated with each agent in the scenario
		handlers::tuple -- optional collision handlers
		view::bool     -- flag for whether you want to view the scenario or not
		frict::float   -- friction value for pymunk physics
		std_dev::float -- standard deviation value for noisy counterfactual simulation
		'''
        self.view = view
        self.std_dev = std_dev
        # Objects in environent
        self.agent = Agent(a_params['loc'][0], a_params['loc'][1],
                           a_params['color'], a_params['coll'],
                           a_params['moves'])
        self.patient = Agent(p_params['loc'][0], p_params['loc'][1],
                             p_params['color'], p_params['coll'],
                             p_params['moves'])
        self.fireball = Agent(f_params['loc'][0], f_params['loc'][1],
                              f_params['color'], f_params['coll'],
                              f_params['moves'])
        # Initial location of objects in environment
        self.p_loc = p_params['loc']
        self.a_loc = a_params['loc']
        self.f_loc = f_params['loc']
        # Pymunk space friction
        self.friction = frict
        # Agent velocities
        self.vel = vel
        self.pf_lock = False
        self.af_lock = False
        self.ap_lock = False
        # Engine parameters
        self.space = None
        self.screen = None
        self.options = None
        self.clock = None
        # Collision handlers
        self.coll_handlers = [x for x in handlers] if handlers else handlers
        # Values needed for rendering the scenario in Blender
        self.tick = 0
        self.agent_collision = None
        self.agent_patient_collision = None
        self.agent_fireball_collision = None
        self.patient_fireball_collision = 0
        self.position_dict = {'agent': [], 'patient': [], 'fireball': []}
        self.screen_size = (1000, 600)
        # Configure and run environment
        self.configure()
Example #26
0
 def setUp(self) -> None:
     self.test_players = [Agent(), Agent()]
Example #27
0
pi_historical = []
w_historical = []
r_historical = []
t_historical = []

# For intra-algo
mu_bar_historical = []
mu_historical = []

# Init the Agent's environment
env = Environment()

# Init the expert agent
# Feed it the expert trajectories
a = Agent(type='expert',
          action_list=['l', 'r'],
          environment=env,
          trajectories=[['r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r']])

# Build said expert trajectories
a.build_trajectories()

# Build the Agent's initial state distribution
a.build_D()

# Init a standalone environment for the state itself
simul_env = Environment()

# Init the simulation
sim = Simulation(agents=a, environment=simul_env, alpha=.02)

# This method will initalize a matrix of state-action pairs and their values (currently set to init all
from disc_bertrand import DiscrBertrand
from config import avg_profit_gain

# Parameters
env = DiscrBertrand()

from config import PARAMS
GAMMA = PARAMS[0]
ALPHA = PARAMS[1]
BETA = PARAMS[2]
NUM_EPISODES = PARAMS[3].astype(int)
nA = PARAMS[5].astype(int)
ITER_BREAK = PARAMS[7].astype(int)
CONV = PARAMS[8].astype(int)
# Objects
agent1 = Agent()
agent2 = Agent()

# Initializations
writer = SummaryWriter(comment="-q-iteration")
iter_no = 0

# Q learning Algorithm
profits = np.zeros((ITER_BREAK + 2, NUM_EPISODES + 2))

for ep in range(NUM_EPISODES):
    print(ep)
    # 1: initialise Qs
    env.reset()
    agent1.reset()
    agent2.reset()
Example #29
0
f.write("\n max_num_steps: " + str(max_num_steps))
f.write("\n num_steps: " + str(num_steps))
f.write("\n mini_batch_size: " + str(mini_batch_size))
f.write("\n ppo_epochs: " + str(ppo_epochs))
f.write("\n GAMMA: " + str(GAMMA))
f.write("\n GAE_LAMBDA: " + str(GAE_LAMBDA))
f.write("\n PPO_EPSILON: " + str(PPO_EPSILON))
f.write("\n CRICIC_DISCOUNT: " + str(CRICIC_DISCOUNT))
f.write("\n ENTROPY_BETA: " + str(ENTROPY_BETA))
f.write("\n eta: " + str(eta))
f.write("\n LSTM: Yes")
f.write("\n Architecture: 1")

f.close()

agent = Agent(state_size_map, state_size_depth , state_size_goal, num_outputs, hidden_size, stack_size, lstm_layers,load_model, MODELPATH, lr, mini_batch_size, num_envs, lr_decay_epoch, init_lr,writer, eta)
max_frames = 500000
test_rewards = []

episode_length = []
for i in range(0, num_envs):
    episode_length.append(max_num_steps)

envs.set_episode_length(episode_length)


early_stop = True

best_reward = 0

map_state,depth_state, goal_state = envs.reset()
Example #30
0
def play(forever, w, h):
    global records, returnsBool, steps

    #variables for graphing
    countTo10 = 0
    myCount = 0
    counter = [0 for i in range(10)]

    num = w * h
    gamma = 0.9
    e = 0.3
    pts = [0 for i in range(int(forever / 10))]

    for i in range(forever):
        # reset the rewards boolean value
        returnsBool = np.zeros(shape=(3, 3, 3, 3, 3, 3, 3, 3, 8))
        records = []  # reset
        #intialize the board
        agent = Agent(w, h)
        ships = agent.ships
        board = agent.enemyBoard
        actionSet = {i for i in range(num)}

        win = False
        while not win:
            # select a random action from the actionSet
            action = random.choice(tuple(actionSet))
            actionSet.remove(action)
            y = int(action / w)
            x = action % h
            hit = checkHit([y, x], ships, board)
            state = getState([y, x], board, w, h)
            #print("rand")
            if hit:
                # if action was a hit, enter Monte Carlo guessing
                records = []  # reset
                win = monteCarlo([y, x], ships, board, e, w, h, gamma)

            else:
                # update the board location to be a miss
                board[y][x] = 1

    # for graphing purposes


##        print(steps/3)
        counter[countTo10] = steps / 3  # ave num of hits to sink a ship
        countTo10 += 1
        steps = 0
        if countTo10 == 10:
            countTo10 = 0
            pts[myCount] = statistics.mean(counter)
            myCount += 1

    episodes = np.array([i for i in range(1, forever + 1, 10)])
    pts = np.array(pts)

    plt.figure(1)
    plt.plot(episodes, pts)

    plt.xlabel('Number of Episodes')
    plt.ylabel('Time Steps to Sink a Ship')
    plt.title('Convergence of Monte Carlo')
    plt.show()

    return board
Example #31
0
def setup(model):
    model.reset()
    model.add_agent(Agent())
Example #32
0
    'LEARNING_RATE': 5e-4,
    'REPLAY_BUFFER_SIZE': 100000,
    'MIN_REPLAY_BUFFER_SIZE': 1000,
    'EPSILON_START': 1,
    'EPSILON_END': 0.1,
    'EPSILON_DECAY_DURATION': 50000,
}
# Allow changing hyperparameters from command-line arguments
args = get_args(default_args=args_dict)

# Create wrapped environment
env = make_env(args.ENV_ID)

# Set Seed
set_seed(env, args.SEED)

# GPU or CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Create agent
agent = Agent(env, device, args)

# Train agent for args.NB_FRAMES
agent.train()

# Save agent
agent.save()

# Test agent
agent.test(render=False)
Example #33
0
    def create_all(self, pops, regions):
        """Based on regions and population data,
        create agents, families, houses, and firms"""
        agent_id = 0
        house_id = 0
        family_id = 0
        firm_id = 0
        my_agents = {}
        my_families = {}
        my_houses = {}
        my_firms = {}
        for region_id, region in regions.items():
            logger.info('Generating region {}'.format(region_id))
            num_houses = 0
            regional_agents = {}

            pop_cols = list(list(pops.values())[0].columns)
            if not self.sim.PARAMS['SIMPLIFY_POP_EVOLUTION']:
                list_of_possible_ages = pop_cols[1:]
            else:
                list_of_possible_ages = [0] + pop_cols[1:]

            loop_age_control = list(list_of_possible_ages)
            loop_age_control.pop(0)

            for age in loop_age_control:
                for gender in ['male', 'female']:
                    cod_mun = region.id
                    pop = pop_age_data(
                        pops[gender], cod_mun, age,
                        self.sim.PARAMS['PERCENTAGE_ACTUAL_POP'])
                    for individual in range(pop):
                        # Qualification
                        # To see a histogram check test:
                        qualification = self.qual(cod_mun)
                        r_age = self.seed.randint(
                            list_of_possible_ages[
                                (list_of_possible_ages.index(age, ) - 1)] + 1,
                            age)
                        money = self.seed.randrange(50, 100)
                        month = self.seed.randrange(1, 13, 1)
                        a = Agent(agent_id, gender, r_age, qualification,
                                  money, month)
                        regional_agents[agent_id] = a
                        agent_id += 1
                        num_houses += 1

            for agent in regional_agents.keys():
                my_agents[agent] = regional_agents[agent]

            num_families = int(num_houses /
                               self.sim.PARAMS['MEMBERS_PER_FAMILY'])
            num_houses = int(num_houses /
                             self.sim.PARAMS['MEMBERS_PER_FAMILY'] *
                             (1 + self.sim.PARAMS['HOUSE_VACANCY']))
            num_firms = int(num_emp[num_emp['cod_mun'] == int(
                region.id)]['num_est'].iloc[0] *
                            self.sim.PARAMS['PERCENTAGE_ACTUAL_POP'])

            regional_families = (self.create_family(num_families, family_id))
            family_id += num_families

            regional_houses = self.create_household(num_houses, region,
                                                    house_id)
            house_id += num_houses

            regional_firms = self.create_firm(num_firms, region, firm_id)
            firm_id += num_firms

            for family in regional_families.keys():
                my_families[family] = regional_families[family]

            for house in regional_houses.keys():
                my_houses[house] = regional_houses[house]

            for firm in regional_firms.keys():
                my_firms[firm] = regional_firms[firm]

            regional_agents, regional_families = self.allocate_to_family(
                regional_agents, regional_families)
            regional_families = self.allocate_to_households(
                regional_families, regional_houses)

            # Set ownership of remaining houses for random families
            for house in regional_houses.keys():
                if regional_houses[house].owner_id is None:
                    family = self.seed.choice(list(regional_families.keys()))
                    regional_houses[house].owner_id = regional_families[
                        family].id
        return my_agents, my_houses, my_families, my_firms
Example #34
0
def test_Agent():
    def constant_prog(percept):
        return percept
    agent = Agent(constant_prog)
    result = agent.program(5)
    assert result == 5
Example #35
0
    print('Rounds played:  ' + str(num_rounds))

    return G


if __name__ == '__main__':
    G = makegraphs.ec_toy()
    c = 2
    agentlist = defaultdict(Agent)
    #id num, utility, endowment, prices, subplans,

    #middlemen.  Agent 1 degree central, agent 2 between central

    agentlist[2] = Agent(2,
                         np.array((10, 1)),
                         np.array((0.01, 0.98)),
                         np.array((10, 10)),
                         loan=LOAN_AMT)
    agentlist[1] = Agent(1,
                         np.array((10, 10)),
                         np.array((0.01, 0.01)),
                         np.array((10, 10)),
                         loan=LOAN_AMT)

    #type 1 util player
    agentlist[3] = Agent(3,
                         np.array((1, 10)),
                         np.array((0.98, 0.01)),
                         np.array((1, 10)),
                         loan=LOAN_AMT)
    agentlist[6] = Agent(6,