def test_multiDimNetworkQLearnigAgent(self): # pylint: disable=line-too-long q_agent = NetworkQLearningAgent(self.multi_dim_ndp, Ne=5, Rplus=2, alpha=lambda n: 60. / (59 + n), delta=0.5, max_iterations=100, calc_status=True) for _ in range(101): q_agent.reset() run_single_trial(q_agent, self.test_multi_dim_mdp, self.sensor_model, self.motor_model) for status in ['energy', 'water']: l.debug('----- ' + status + '------') U, pi = q_agent.Q_to_U_and_pi()[status] # print the utilities and the policy also U1 = sorted(U.items(), key=lambda x: x[0]) pi1 = sorted(pi.items(), key=lambda x: x[0]) print_grid(U1) print_grid(pi1) save_csv_file('two_dim.csv', [self.multi_dim_ndp.history], self.ndp.history_headers, OUTPUT_DIR) l.debug('test_multiDimNetworkQLearnigAgent:', self.multi_dim_ndp.statuses)
def test_networkQLearnigAgentPrintDetails(self): for i in range(100): l.debug('**** TRIAL: ', i, ' ****') q_agent = NetworkQLearningAgent(self.ndp, Ne=5, Rplus=2, alpha=lambda n: 60. / (59 + n), delta=0.5, max_iterations=100, calc_status=True) q_agent.reset() run_single_trial(q_agent, self.test_mdp, self.sensor_model, self.motor_model, DEBUG_MODE)
class GridAgent(Agent): def __init__(self, objectives, landmarks): # pylint: disable=line-too-long, too-many-locals super().__init__(None, 'grid_agent') N = Network(None, objectives) SENSOR = N.add_SENSOR_node self.status = N.get_NEEDs() self.status_history = {'energy': [], 'water': []} # Create sensors SENSOR(Water) SENSOR(Energy) # create one SENSOR for each square sensor_dict = {} for lm in landmarks: sensor_dict[frozenset([SENSOR(Landmark, lm)])] = lm network_model = NetworkModel(sensor_dict) M = MotorNetwork(motors, motors_to_action) # NOTE: init=agent_start_pos, using a location here (only for debugging), # is a state when MDP:s are used self.ndp = NetworkDP(agent_start_pos, self.status, motor_model, .9, network_model) self.q_agent = NetworkQLearningAgent(self.ndp, Ne=0, Rplus=2, alpha=lambda n: 60. / (59 + n), epsilon=0.2, delta=0.5) # compose applies the functions from right to left self.program = compose( do(partial(l.debug, 'mnetwork.update')), M.update, do(partial(l.debug, 'q_agent')), self.q_agent, do(partial(l.debug, N)), do(partial(l.debug, 'network.update')), N.update, do(partial(l.debug, 'percept')), lambda x: do(partial(l.debug, '*** ENERY FOUND ***'))(x) if 'energy' in x[1] and x[1]['energy'] > 0.0 else x, lambda x: do(partial(l.debug, '*** WATER FOUND ***'))(x) if 'water' in x[1] and x[1]['water'] > 0.0 else x, do(self.printU)) def __repr__(self): return '<{} ({})>'.format(self.__name__, self.__class__.__name__) def printU(self, _): for status in ['energy', 'water']: l.info('----- ' + status + '------') U, pi = self.q_agent.Q_to_U_and_pi()[status] l.info('Utilities:') U = {k: '{0:.3f}'.format(v) for k, v in U.items()} print_grid(U) l.info('Policy:') print_grid(pi)
def __init__(self, objectives, landmarks): # pylint: disable=line-too-long, too-many-locals super().__init__(None, 'grid_agent') N = Network(None, objectives) SENSOR = N.add_SENSOR_node self.status = N.get_NEEDs() self.status_history = {'energy': [], 'water': []} # Create sensors SENSOR(Water) SENSOR(Energy) # create one SENSOR for each square sensor_dict = {} for lm in landmarks: sensor_dict[frozenset([SENSOR(Landmark, lm)])] = lm network_model = NetworkModel(sensor_dict) M = MotorNetwork(motors, motors_to_action) # NOTE: init=agent_start_pos, using a location here (only for debugging), # is a state when MDP:s are used self.ndp = NetworkDP(agent_start_pos, self.status, motor_model, .9, network_model) self.q_agent = NetworkQLearningAgent(self.ndp, Ne=0, Rplus=2, alpha=lambda n: 60. / (59 + n), epsilon=0.2, delta=0.5) # compose applies the functions from right to left self.program = compose( do(partial(l.debug, 'mnetwork.update')), M.update, do(partial(l.debug, 'q_agent')), self.q_agent, do(partial(l.debug, N)), do(partial(l.debug, 'network.update')), N.update, do(partial(l.debug, 'percept')), lambda x: do(partial(l.debug, '*** ENERY FOUND ***'))(x) if 'energy' in x[1] and x[1]['energy'] > 0.0 else x, lambda x: do(partial(l.debug, '*** WATER FOUND ***'))(x) if 'water' in x[1] and x[1]['water'] > 0.0 else x, do(self.printU))
def __init__(self, objectives): # pylint: disable=line-too-long super().__init__(None, 'calf') motors = ['eat_and_forward', 'forward', 'dive_and_forward', 'up_and_forward'] eat_and_forward, forward = frozenset([0]), frozenset([1]) dive_and_forward, up_and_forward = frozenset([2]), frozenset([3]) motors_to_action = {eat_and_forward: 'eat_and_forward', forward: 'forward', dive_and_forward: 'dive_and_forward', up_and_forward: 'up_and_forward', '*': '-'} motor_model = MotorModel(motors_to_action) self.network = N = Network(None, objectives) self.status = N.get_NEEDs() self.status_history = {'energy':[]} s1 = N.add_SENSOR_node(Squid) s2 = N.add_SENSOR_node(Song) self.network_model = NetworkModel({frozenset([]): 'no_sensors', frozenset([s1]): 'squid', frozenset([s2]): 'song', frozenset([s1,s2]): 'squid_and_song'}) self.motor_network = M = MotorNetwork(motors, motors_to_action) # NOTE: init=agent_start_pos, using a location here (only for debugging), # is a state when MDP:s are used self.ndp = NetworkDP(calf_start_pos, self.status, motor_model, gamma=.9, network_model=self.network_model) self.q_agent = NetworkQLearningAgent(self.ndp, Ne=0, Rplus=2, alpha=lambda n: 60./(59+n), epsilon=0.2, delta=0.5) # compose applies the functions from right to left self.program = compose(do(partial(l.debug, 'Calf mnetwork.update')) , do(partial(l.debug, M)) , lambda a: do(partial(l.debug, '*** CALF EATING! ***'))(a) if a == 'eat_and_forward' else a , M.update , do(partial(l.debug, 'Calf q_agent')) , self.q_agent , do(partial(l.debug, N)) , lambda p: do(partial(l.debug, '*** CALF HEARD SONG! ***'))(p) if s2 in p[0] else p , lambda p: do(partial(l.debug, '*** CALF FOUND SQUID! ***'))(p) if s1 in p[0] else p , do(partial(l.debug, 'Calf network.update')) , N.update , do(partial(l.debug, 'Calf percept')) )
def __init__(self, objectives): # pylint: disable=line-too-long, too-many-locals # program=None super().__init__(None, 'mom') # Motors and actions motors = ['sing_eat_and_forward', 'forward', 'dive_and_forward', 'up_and_forward'] sing_eat_and_forward, forward = frozenset([0]), frozenset([1]) dive_and_forward, up_and_forward = frozenset([2]), frozenset([3]) motors_to_action = {sing_eat_and_forward: 'sing_eat_and_forward', forward: 'forward', dive_and_forward: 'dive_and_forward', up_and_forward: 'up_and_forward', '*': '-'} motor_model = MotorModel(motors_to_action) self.network = N = Network(None, objectives) self.status = N.get_NEEDs() self.status_history = {'energy':[]} s1 = N.add_SENSOR_node(Squid) self.network_model = NetworkModel({frozenset(): 'no_sensors', frozenset([s1]): 'squid'}) self.motor_network = M = MotorNetwork(motors, motors_to_action) # NOTE: init=agent_start_pos, using a location here (only for debugging), # is a state when MDP:s are used self.ndp = NetworkDP(mom_start_pos, self.status, motor_model, gamma=.9, network_model=self.network_model) self.q_agent = NetworkQLearningAgent(self.ndp, Ne=0, Rplus=2, alpha=lambda n: 60./(59+n), epsilon=0.2, delta=0.5) # compose applies the functions from right to left self.program = compose(do(partial(l.debug, 'Mom mnetwork.update')) , do(partial(l.debug, M)) , M.update , do(partial(l.debug, 'Mom q_agent')) , self.q_agent , do(partial(l.debug, N)) , do(partial(l.debug, 'Mom network.update')) , N.update , do(partial(l.debug, 'Mom percept')) )
def test_networkQLearnigAgent(self): # pylint: disable=line-too-long self.assertTrue( self.sensor_model.model == { (True, False, False, False, False, False, False, False, False, False, False): 'a', (False, True, False, False, False, False, False, False, False, False, False): 'b', (False, False, True, False, False, False, False, False, False, False, False): 'c', (False, False, False, True, False, False, False, False, False, False, False): 'd', (False, False, False, False, True, False, False, False, False, False, False): 'e', (False, False, False, False, False, True, False, False, False, False, False): 'f', (False, False, False, False, False, False, True, False, False, False, False): 'g', (False, False, False, False, False, False, False, True, False, False, False): 'h', (False, False, False, False, False, False, False, False, True, False, False): 'i', (False, False, False, False, False, False, False, False, False, True, False): 'j', (False, False, False, False, False, False, False, False, False, False, True): 'k' }) self.assertTrue( self.ndp.actlist == [(False, True), (False, False), (True, True), (True, False)]) q_agent = NetworkQLearningAgent(self.ndp, Ne=5, Rplus=2, alpha=lambda n: 60. / (59 + n), delta=0.5, max_iterations=100, calc_status=True) for i in range(100): l.debug('**** TRIAL: ', i, ' ****') q_agent.reset() run_single_trial(q_agent, self.test_mdp, self.sensor_model, self.motor_model, DEBUG_MODE) U, pi = q_agent.Q_to_U_and_pi()['energy'] # print the utilities and the policy also U1 = sorted(U.items(), key=lambda x: x[0]) pi1 = sorted(pi.items(), key=lambda x: x[0]) l.debug('------------------') print_grid(U1) print_grid(pi1) l.debug('AAA', U, U1) l.debug('BBB', pi, pi1) l.debug('CCC', q_agent) save_csv_file('one_dim.csv', [self.ndp.history], self.ndp.history_headers, OUTPUT_DIR) # check utilities and policy # TODO: Seams difficult to get the samt result at each run. Probably due to tests running in parallel #self.assertTrue(U == {'h': 0.5675987591078075, 'i': 0.4286810287409787, 'j': 0.3330852421527908, 'k': -0.04, 'g': 0.48303073762721593, 'f': 0.35799047401701395, 'e': 0.7127484585669493, 'c': 1.302492662358114, 'd': 0.4742671906118568, 'a': 0.8593590286870549, 'b': 1.0802110658809299}) #self.assertTrue(pi == {'h': '^', 'i': '<', 'j': '<', 'k': None, 'g': '<', 'f': '<', 'e': '^', 'c': '>', 'd': '>', 'a': '>', 'b': '>'}) l.debug('test_networkQLearnigAgent:', self.ndp.statuses)