예제 #1
0
def play(agent1: Agent, agent2: Agent, n_holes=7, n_stones=7, max_game_length=200):
    game = Mancala(n_holes, n_stones)
    player = random.choice(['north', 'south'])
    game_length = 0
    finished = False

    while not finished:

        if player == 'north':
            move = agent1.get_move(game, 'north')
        else:
            move = agent2.get_move(game, 'south')
        game.step(player, move)
        player = game.next_player

        game_length += 1
        if game.game_over or game_length > max_game_length:
            finished = True

    if game.winner == 'north':
        winner = agent1
    elif game.winner == 'south':
        winner = agent2
    else:  # tie
        winner = None
    return winner
예제 #2
0
 def __init__(self, environment: dict, verbose=False):
     """ Initializer for the simulator helper
     Args:
         environment (dict): dictionary housing the obj map (bitmap) and more
         verbose (bool): flag to print debug prints
     """
     self.environment = environment
     self.params = create_simulator_params(verbose)
     self.episode_params = None
     self.algo_name = "lite"  # by default there is no robot (or algorithm)
     self.obstacle_map = None
     # keep track of all agents in dictionary with names as the key
     self.agents = {}
     # keep track of all robots in dictionary with names as the key
     self.robots = {}
     # keep track of all prerecorded humans in a dictionary like the otherwise
     self.backstage_prerecs = {}
     self.prerecs = {}
     # keep a single (important) robot as a value
     self.robot = None
     self.sim_states = {}
     self.wall_clock_time: float = 0
     self.sim_t: float = 0.0
     self.dt: float = 0  # will be updated in simulator based off dt
     # metadata of agents
     self.total_agents: int = 0
     self.num_collided_agents: int = 0
     self.num_completed_agents: int = 0
     self.num_timeout_agents: int = 0  # updated with (non-robot) add_agent
     # restart agent coloring on every instance of the simulator to be consistent across episodes
     Agent.restart_coloring()
예제 #3
0
 def reset(self):
     Agent.reset(self)
     self.Q = self.model_lambda()
     self.target_Q = self.model_lambda()
     self.target_Q.set_weights(self.Q.get_weights())
     self.buffer.reset()
     self.updates_since_target_updated = 0
예제 #4
0
    def __init__(self, action_space, observation_space, params):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)

        #Initialize table with all zeros
        self.Q = np.zeros([observation_space.n, action_space.n])

        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
    def test_agent(self):
        _, image = Loader.get_action('cylinder-cube-1', '2019-03-26-09-08-16-480', 'ed-v')

        if TEST_WITH_GPU:
            agent = Agent()
            result = agent.infer([image], SelectionMethod.Max)

            self.assertEqual(result.safe, True)
            self.assertEqual(result.method, SelectionMethod.Max)
예제 #6
0
def spawn_agent(agent_def=None, test_run_name = None):
    ''' 
        Spawn a new creature and give it an agent.
    '''
    mod_str,cls_str,arg_str = agent_def.split("/")
    import importlib
    Agent = getattr(importlib.import_module(mod_str), cls_str)
    kwargs = eval(arg_str)
    if len(kwargs) > 0:
        return Agent(observ_space, action_space,test_run_name=test_run_name, **kwargs)
    return Agent(observ_space, action_space)
    def test_no_transfer_if_no_bet(self):
        with mock.patch('agents.agent.PredictionMarketAdapter', autospec=True) \
                    as MockPredictionMarket:
            mock_prediction_market = MockPredictionMarket.return_value
            account = '42'
            agent = Agent(account, logging=False)
            agent.prediction_history = [None, None, None]

            agent.collect_reward()

            mock_prediction_market.transfer_reward.assert_not_called()
예제 #8
0
    def __init__(self, action_space,observation_space,params,discreet=False):
        # Use uper init
        Agent.__init__(self,action_space,observation_space,params)
        self.discreet = discreet
        if discreet:
            self.inputN = self.observation_space.n
        else:
            self.inputN = self.observation_space.shape[0]
        self.actionN = self.action_space.n
        
        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
        self.learnRate = self.params[1]  # Number of episodes
        self.dicount = self.params[2]   # Time range value for reward
        self.epsi = self.params[3]   # Epsilon for greedy picking
        self.epsi_decay = self.params[4]
        self.epsi_min = 0.001
        self._timeTot = 200
        #define TF graph
        tf.reset_default_graph()
        #graph1 = tf.Graph()
        #with graph1.as_default():
        #These lines establish the feed-forward part of the network used to choose actions
        
        n_hidden_1  = 64
        n_hidden_2  = 32
        self.inputs1 = tf.placeholder(shape=[1,self.inputN],dtype=tf.float32)
        #W1 = tf.Variable(tf.random_uniform([self.inputN,self.actionN],0,0.01))
        
        W1 = tf.Variable(tf.random_normal([self.inputN,n_hidden_1]))
        W2 = tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2]))
        W3 = tf.Variable(tf.random_normal([n_hidden_2, self.actionN]))
        
        layer_1 = tf.nn.relu(tf.matmul(self.inputs1, W1))
        layer_2 = tf.nn.relu(tf.matmul(layer_1, W2))
        self.Qout = tf.matmul(layer_2, W3)
        
        #self.Qout = tf.matmul(self.inputs1,self.W)
        self.predict = tf.argmax(self.Qout,1)
        
        self.time = 0
        self.currEpisode = 0 # Current training stage epsiode
        self.currQs = None # Current prediction for the Q values using current observation

        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.nextQ = tf.placeholder(shape=[1,self.actionN],dtype=tf.float32)
        loss = tf.reduce_sum(tf.square(self.nextQ - self.Qout))
        trainer = tf.train.AdamOptimizer(learning_rate=self.learnRate)
        #trainer = tf.train.GradientDescentOptimizer(learning_rate=self.learnRate)
        self.updateModel = trainer.minimize(loss)
        
        init = tf.global_variables_initializer()
        self.session = tf.Session()
        self.session.run(init)
예제 #9
0
    def __init__(self, action_space, observation_space, params):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)

        #Initialize table with all zeros
        self.Q = np.zeros([observation_space.n, action_space.n])

        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
        self.lr = self.params[1]  #.5  # Learning Rate
        self.y = self.params[2]  # .8  # Discount Factor
        self.currEpisode = 0  # Current training stage epsiode
예제 #10
0
 def __init__(self, obs):
     Agent.__init__(self)
     self.capacity = 5
     self.occupation = 0
     self.type = "Taxi"
     self.body.mass = 1000
     self.stat = 0
     self.clients = []
     self.body.fustrum.radius = 200
     self.body.vitesseMax = 15
     self.observerM = obs
     self.observer = None
     self.policy = TaxisPolicy.NONE 
예제 #11
0
def run(env_name='Ant-v2', num_steps=1000):
    env = gym.make(env_name)
    agent = Agent(env.observation_space, env.action_space)

    state = env.reset()
    reward = None
    done = False
    for _ in range(num_steps):
        env.render()
        action, _ = agent.act(state, reward, done)
        state, reward, done, info = env.step(action)
        print(reward)
        if done:
            state = env.reset()
예제 #12
0
    def __init__(self, max_sims=50):
        # Takes an instance of a Board and optionally some keyword
        # arguments.  Initializes the list of game states and the
        # statistics tables.

        Agent.__init__(self)

        self.total_simulations = 0
        self.root_node = None
        self.if_debug = False
        self.loglevel = 0

        # parameters to change for how deep it goes
        self.max_sims = max_sims
예제 #13
0
def interact(env: Env, agent: Agent,
             start_obs: Arrayable) -> Tuple[array, array, array]:
    """One step interaction between env and agent.

    :args env: environment
    :args agent: agent
    :args start_obs: initial observation

    :return: (next observation, reward, terminal?)
    """
    action = agent.step(start_obs)
    next_obs, reward, done, information = env.step(action)
    time_limit = information[
        'time_limit'] if 'time_limit' in information else None
    agent.observe(next_obs, reward, done, time_limit)
    return next_obs, reward, done
예제 #14
0
 def __init__(self, f):
     Agent.__init__(self)
     self.body = BoidsBody()
     self.type = "StandardAgent"
     self.famille = f
     self.body.mass = 80
     self.body.fustrum.radius = 100
     self.body.vitesseMax = 150.0
     self.body.vitesseMin = 20.0
     self.velocity = [
         random.uniform(-50.0, 50.0),
         random.uniform(-50.0, 50.0)
     ]
     self.avoidanceFactor = 7.5
     self.obstacleFactor = 500
     self.target = Vector2D(0, 0)
예제 #15
0
 def test_invalid(self):
     dummy_agent = Agent()
     with self.assertRaises(AssertionError):
         Player(dummy_agent, INVALID_PLAYER_ID_1, DUMMY_NAME, DUMMY_COLOUR_NAME, YELLOW)
     with self.assertRaises(AssertionError):
         Player(dummy_agent, INVALID_PLAYER_ID_2, DUMMY_NAME, DUMMY_COLOUR_NAME, YELLOW)
     with self.assertRaises(AssertionError):
         Player(dummy_agent, PLAYER2_ID, DUMMY_NAME, DUMMY_COLOUR_NAME, INVALID_COLOUR_RGB)
예제 #16
0
 def __init__(self, memory_length=5):
     """
     Empty constructor
     """
     Agent.__init__(self)
     self.memoryLength = 1
     self.color_memory = [''] * memory_length  # previous color
     self.move_memory = [
         []
     ] * memory_length  # previous move location [piece, i, j]
     self.piece_memory = [
         []
     ] * memory_length  # previously played piece structure
     self._colors: List[str] = ['_', 'P', 'G', 'B', 'Y', 'O',
                                'V']  # Piece colors
     self._to_update = 0
     self._update_limit = memory_length - 1
예제 #17
0
    def __init__(self,
                 action_space,
                 observation_space,
                 params,
                 discreet=False):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)
        self.discreet = discreet
        if discreet:
            self.inputN = self.observation_space.n
        else:
            self.inputN = self.observation_space.shape[0]
        self.actionN = self.action_space.n

        # Set learning parameters
        self.episode_count = self.params[0]  # Number of episodes
        self.learnRate = self.params[1]  # Number of episodes
        self.discount = self.params[2]  # Time range value for reward
        self.epsi = self.params[3]  # Epsilon for greedy picking
        self.epsi_decay = self.params[4]

        self.pretrainEpi = 250  # Number of steps before first train
        self.batch_size = 200  #Size of training batch
        self.trainPadding = 5  # Every xth step a training occurs
        self.tau = 0.01  #Amount to update target network at each step.
        self.method = self.selectMethod("e-greedy")

        self.epsi_min = 0.001

        self.currEpisode = 0  # Current training stage epsiode
        self.time = 0  # Current frame within one episode
        self._timeTot = 200  # Maximal time in one episode
        self.currQs = None  # Current prediction for the Q values using current observation

        tf.reset_default_graph()
        self.qNet = Q_Network([[self.inputN, 128, self.actionN],
                               self.learnRate])
        self.targetQNet = Q_Network([[self.inputN, 128, self.actionN],
                                     self.learnRate])
        self.myBuffer = ExperienceBuffer()

        init = tf.global_variables_initializer()
        trainables = tf.trainable_variables()
        self.targetOps = Q_Network.updateTargetGraph(trainables, self.tau)
        self.session = tf.Session()
        self.session.run(init)
예제 #18
0
def train(nb_steps: int, env: Env, agent: Agent, start_obs: Arrayable):
    """Trains for one epoch.

    :args nb_steps: number of interaction steps
    :args env: environment
    :args agent: interacting agent
    :start_obs: starting observation

    :return: final observation
    """
    agent.train()
    agent.reset()
    obs = start_obs
    for _ in range(nb_steps):
        # interact
        obs, _, _ = interact(env, agent, obs)
    return obs
예제 #19
0
    def __init__(self):
        """
        Initializes random DQN model
        """
        Agent.__init__(self)

        # Initialize DQN
        dqn_input_dim = len(SquareStackerGame().get_state_vector())
        dqn_output_dim = len(move_to_vector([0, 0, 0]))
        self._dqn = Sequential([
            Dense(128, input_dim=dqn_input_dim),
            Activation('relu'),
            Dense(128),
            Activation('relu'),
            Dense(dqn_output_dim),
        ])
        self._dqn.compile(optimizer=Adam(), loss='mse', metrics=['accuracy'])
예제 #20
0
 def test_valid(self):
     dummy_agent = Agent()
     subject = Player(dummy_agent, PLAYER1_ID, DUMMY_NAME, DUMMY_COLOUR_NAME, YELLOW)
     self.assertEqual(subject.agent, dummy_agent)
     self.assertEqual(subject.piece_id, PLAYER1_ID)
     self.assertEqual(subject.name, DUMMY_NAME)
     self.assertEqual(subject.colour_name, DUMMY_COLOUR_NAME)
     self.assertEqual(subject.colour_rgb, YELLOW)
예제 #21
0
    def __init__(self,
                 env,
                 tuning_parameters,
                 replicated_device=None,
                 thread_id=0,
                 create_target_network=True):
        Agent.__init__(self, env, tuning_parameters, replicated_device,
                       thread_id)
        self.main_network = NetworkWrapper(tuning_parameters,
                                           create_target_network,
                                           self.has_global, 'main',
                                           self.replicated_device,
                                           self.worker_device)
        self.networks.append(self.main_network)
        self.q_values = Signal("Q")
        self.signals.append(self.q_values)

        self.reset_game(do_not_reset_env=True)
예제 #22
0
 def __init__(self):
     Agent.__init__(self)
     self.timeout = 600
     self.destination = Destination(0, 0)
     self.onboard = -1
     self.type = "Client"
     self.body.mass = 80
     self.body.vitesseMax = 1
     self.body.fustrum.radius = 100
     self.policy = ClientsPolicy.NONE
     self.observer = ClientObserver(self.id, time.time(),
                                    self.body.location)
     self.cohesionFactor = 0.03
     self.velocity = [
         random.uniform(-50.0, 50.0),
         random.uniform(-50.0, 50.0)
     ]
     self.allignFactor = 0.045
예제 #23
0
def main(game_name, game_length):
    #Game description
    reward_mode = 'base'
    reward_scale = 1.0
    elite_prob = 0
    env = Env(
        game_name, game_length, {
            'reward_mode': reward_mode,
            'reward_scale': reward_scale,
            'elite_prob': elite_prob
        })

    #Network
    latent_shape = (512, )
    dropout = 0
    lr = .0001
    gen = Generator(latent_shape, env, 'nearest', dropout, lr)

    #Agent
    num_processes = 1
    experiment = "Experiments"
    lr = .00025
    model = 'base'
    dropout = .3
    reconstruct = None
    r_weight = .05
    Agent.num_steps = 5
    Agent.entropy_coef = .01
    Agent.value_loss_coef = .1
    agent = Agent(env, num_processes, experiment, 0, lr, model, dropout,
                  reconstruct, r_weight)

    #Training
    gen_updates = 1e4
    gen_batch = 32
    gen_batches = 1
    diversity_batches = 0
    rl_batch = 1e4
    pretrain = 0
    elite_persist = False
    elite_mode = 'mean'
    load_version = 0
    notes = ''
    agent.writer.add_hparams(
        {
            'Experiment': experiment,
            'RL_LR': lr,
            'Minibatch': gen_batch,
            'RL_Steps': rl_batch,
            'Notes': notes
        }, {})
    t = Trainer(gen, agent, experiment, load_version, elite_mode,
                elite_persist)
    t.loss = lambda x, y: x.mean().pow(2)
    t.train(gen_updates, gen_batch, gen_batches, diversity_batches, rl_batch,
            pretrain)
예제 #24
0
def main(game_name, game_length):
    #Game description
    reward_mode = 'time'
    reward_scale = 1.0
    elite_prob = .5
    env = Env(
        game_name, game_length, {
            'reward_mode': reward_mode,
            'reward_scale': reward_scale,
            'elite_prob': elite_prob
        })

    #Network
    latent_shape = (512, )
    dropout = .2
    lr = .0001
    gen = Generator(latent_shape, env, 'pixel', dropout, lr)

    #Agent
    num_processes = 16
    experiment = "Experiment_Paper"
    lr = .00025
    model = 'resnet'
    dropout = 0
    reconstruct = gen
    r_weight = .05
    Agent.num_steps = 5
    Agent.entropy_coef = .01
    Agent.value_loss_coef = .1
    agent = Agent(env, num_processes, experiment, 0, lr, model, dropout,
                  reconstruct, r_weight)

    #Training
    gen_updates = 100
    gen_batch = 128
    gen_batches = 10
    diversity_batches = 90
    rl_batch = 1e6
    pretrain = 2e7
    elite_persist = True
    elite_mode = 'max'
    load_version = 0
    notes = 'Configured to match paper results'
    agent.writer.add_hparams(
        {
            'Experiment': experiment,
            'RL_LR': lr,
            'Minibatch': gen_batch,
            'RL_Steps': rl_batch,
            'Notes': notes
        }, {})
    t = Trainer(gen, agent, experiment, load_version, elite_mode,
                elite_persist)
    t.train(gen_updates, gen_batch, gen_batches, diversity_batches, rl_batch,
            pretrain)
예제 #25
0
 def simulate(self):
     """ A function that simulates an entire episode. The gen_agents are updated with simultaneous
     threads running their update() functions and updating the robot with commands from the
     external joystick process.
     """
     # initialize pre-simulation metadata
     self.init_sim_data()
     # keep track of wall-time in the simulator
     start_time = time.time()
     # get initial state
     current_state = self.save_state()
     # initialize robot update thread
     r_t = self.init_robot_listener_thread(current_state)
     # start iteration
     iteration = 0
     self.print_sim_progress(iteration)
     # run simulation
     while self.sim_t <= self.episode_params.max_time and self.loop_condition(
     ):
         wall_t = time.time()
         # update the time for all agents
         Agent.set_sim_t(self.sim_t)
         # initiate thread operations
         self.pedestrians_update(current_state)
         if self.robot is not None:
             # calls a single iteration of the robot update
             self.robot.update()
         # update simulator time
         self.sim_t += self.dt
         # capture time after all the gen_agents have updated
         # Takes screenshot of the new simulation state
         current_state = self.save_state(wall_t - start_time)
         if self.robot:
             self.robot.update_world(current_state)
         # update iteration count
         iteration += 1
         # print simulation progress
         self.print_sim_progress(iteration)
         # synchronize time with real-world if running in asynchronous mode
         self.synchronize(wall_t)
     # finish the simulate
     self.conclude_simulation(start_time, iteration, r_t)
예제 #26
0
 def init_sim_data(self, verbose: bool = True):
     self.total_agents = len(self.agents) + len(self.backstage_prerecs)
     # Create pre-simulation metadata
     if verbose:
         print("Running simulation on", self.total_agents, "agents")
     # scale the simulator time
     self.dt = self.params.delta_t_scale * self.params.dt
     # update the baseline agents' simulation refresh rate
     Agent.set_sim_dt(self.dt)
     Agent.set_sim_t(self.sim_t)
     # add the first (when t=0) agents to the self.prerecs dict
     self.init_prerec_agent_threads(current_state=None)
     # save initial state before the simulator is spawned
     self.sim_t = 0.0
     if self.dt < self.params.dt:
         print(
             "%sSimulation dt is too small; either lower the gen_agents' dt's"
             % color_red, self.params.dt,
             "or increase simulation delta_t%s" % color_reset)
         exit(1)
예제 #27
0
파일: boids.py 프로젝트: BDafflon/PAMELA
 def __init__(self):
     Agent.__init__(self)
     self.body = BoidsBody()
     self.collisionDVel = 1
     self.type = "Boid"
     self.famille = 1
     self.body.mass = 80
     self.body.fustrum.radius = 100
     self.body.vitesseMax = 150.0
     self.body.vitesseMin = 20.0
     self.repultion = 150
     self.cohesionFactor = 0.03
     self.collisionDistance = 10
     self.velocity = [
         random.uniform(-50.0, 50.0),
         random.uniform(-50.0, 50.0)
     ]
     self.allignFactor = 0.045
     self.avoidanceFactor = 7.5
     self.attractorFactor = 0.35
     self.obstacleFactor = 500
예제 #28
0
    def __init__(self, action_space, observation_space, params):
        # Use uper init
        Agent.__init__(self, action_space, observation_space, params)

        # Set learning parameters
        self.episode_count = params[0]  # Number of episodes
        self.lr = params[1]  #.5  # Learning Rate
        self.y = params[2]  # .8  # Discount Factor
        self.binsize = params[
            3]  # Should be uneven to distinguish -epsi and epsi
        self.currEpisode = 0  # Current training stage epsiode

        #Initialize table with all zeros
        self.Q = np.zeros([
            np.power(self.binsize, observation_space.shape[0]), action_space.n
        ])

        # Determine Bins
        self.low = [-0.5, -2, -0.25, -2]  #self.observation_space.low
        self.high = [0.5, 2, 0.25, 2]  # self.observation_space.high
        self.createBins()
예제 #29
0
    def init_control_pipeline(self):
        # NOTE: this is like an init() run *after* obtaining episode metadata
        # robot start and goal to satisfy the old Agent.planner
        self.start_config = generate_config_from_pos_3(self.get_robot_start())
        self.goal_config = generate_config_from_pos_3(self.get_robot_goal())
        # rest of the 'Agent' params used for the joystick planner
        self.agent_params = create_agent_params(with_planner=True,
                                                with_obstacle_map=True)
        # update generic 'Agent params' with joystick-specific params
        self.agent_params.episode_horizon_s = self.joystick_params.episode_horizon_s
        self.agent_params.control_horizon_s = self.joystick_params.control_horizon_s
        # init obstacle map
        self.obstacle_map = self.init_obstacle_map()
        self.obj_fn = Agent._init_obj_fn(self, params=self.agent_params)
        psc_obj = Agent._init_psc_objective(params=self.agent_params)
        self.obj_fn.add_objective(psc_obj)

        # Initialize Fast-Marching-Method map for agent's pathfinding
        Agent._init_fmm_map(self, params=self.agent_params)

        # Initialize system dynamics and planner fields
        self.planner = Agent._init_planner(self, params=self.agent_params)
        self.vehicle_data = self.planner.empty_data_dict()
        self.system_dynamics = Agent._init_system_dynamics(
            self, params=self.agent_params)
        # init robot current config from the starting position
        self.robot_current = self.current_ep.get_robot_start().copy()
        # init a list of commands that will be sent to the robot
        self.commands = None
class TestAgentController(TestCase):
    controller = LastChanceAgentController(Agent())

    def test_is_betting_period(self):
        self.assertTrue(only_once_during_first_half_day(self.controller.is_betting_period))

    def test_is_ranking_period(self):
        self.assertTrue(only_once_during_first_half_day(self.controller.is_ranking_period))

    def test_is_collecting_period(self):
        self.assertTrue(only_once_during_second_half_day(self.controller.is_collecting_period))

    def test_bet_before_rank(self):
        self.assertTrue(first_this_then_that(self.controller.is_betting_period,
                        self.controller.is_ranking_period))
예제 #31
0
 def __init__(self, *args, **kwargs):
     Agent.__init__(self, *args, **kwargs)
     if self.knowledge is None:
         self.knowledge = set()
     self.knowledge = self._convert_to_set(self.knowledge)
     assert isinstance(self.knowledge, set)