def test_dqn_agent(): env = RoRoDeck(lanes=8, rows=12) n_games = 2 agent = DQLearningAgent(env=env, module_path=None, gamma=0.999, epsilon=1.0, alpha=0.0005, mem_size=10000000, batch_size=64, epsilon_min=0.01, epsilon_dec=0.99999) actions_taken = 0 for i in range(n_games): done = False score = 0 observation = env.reset() while not done: action = env.action_space_sample() state_actions = env.possible_actions observation_, reward, done, info = env.step(action) actions_taken += 1 new_state_actions = env.possible_actions score += reward agent.memory.store_transition(observation, action, reward, observation_, done) observation = observation_ agent.learn() assert np.shape(np.nonzero(agent.memory.reward_memory))[1] == actions_taken assert np.shape(np.nonzero(agent.memory.terminal_memory))[1] == 2
def test_env_parameter(): env = RoRoDeck() env.reset() assert env.vehicle_data.shape[0] == 6 assert 0 <= env.current_Lane <= env.lanes assert env.grid.shape == (env.rows, env.lanes) assert env.sequence_no >= 0 and isinstance(env.sequence_no, int) assert len(env.reward_system) == 4
def test_reefer_positions(): env = RoRoDeck() env.vehicle_data[5][4] = 1 # enable reefer for test env.reset() env.current_Lane = 0 assert env.grid_reefer.T[env.current_Lane][-1] == 1 assert len(env.vehicle_data.T) == len(env.get_possible_actions_of_state()) == 5 env.current_Lane = 4 assert env.grid_reefer.T[env.current_Lane][-1] == 0 assert len(env.get_possible_actions_of_state()) == 4 assert 4 not in env.get_possible_actions_of_state()
def test_is_action_legal(): env = RoRoDeck() env.reset() number_vehicle = env.vehicle_data[1][0] assert env._is_action_legal(0) for i in range(number_vehicle): assert env._is_action_legal(0) env.step(0) assert not env._is_action_legal(0)
def test_reset_method(): env = RoRoDeck() vehicle_data = env.vehicle_data.copy() end_of_lanes = env.end_of_lanes.copy() grid = env.grid.copy() grid_destination = env.grid_destination.copy() grid_vehicle_type = env.grid_vehicle_type.copy() capacity = env.total_capacity.copy() vehicle_counter = env.vehicle_Counter.copy() mandatory_cargo_mask = env.mandatory_cargo_mask.copy() loaded_vehicles = env.loaded_vehicles.copy() reward_system = env.reward_system.copy() sequence_no = env.sequence_no current_lane = env.current_Lane.copy() action_space = env.action_space.copy() possible_actions = env.possible_actions.copy() number_of_vehicles_loaded = env.number_of_vehicles_loaded.copy() env.reset() env.step(env.action_space_sample()) env.step(env.action_space_sample()) env.reset() _vehicleData = env.vehicle_data _endOFLanes = env.end_of_lanes _grid = env.grid _gridDestination = env.grid_destination _gridVehicleType = env.grid_vehicle_type _capacity = env.total_capacity _vehicleCounter = env.vehicle_Counter _mandatoryCargoMask = env.mandatory_cargo_mask _loadedVehicles = env.loaded_vehicles _rewardSystem = env.reward_system _sequence_no = env.sequence_no _currentLane = env.current_Lane _actionSpace = env.action_space _possibleActions = env.possible_actions _numberOfVehiclesLoaded = env.number_of_vehicles_loaded assert (_vehicleData == vehicle_data).all() assert (_endOFLanes == end_of_lanes).all() assert (_grid == grid).all() assert (_gridDestination == grid_destination).all() assert (_gridVehicleType == grid_vehicle_type).all() assert (_capacity == capacity).all() assert (_vehicleCounter == vehicle_counter).all() assert (_mandatoryCargoMask == mandatory_cargo_mask).all() assert (_loadedVehicles == loaded_vehicles).all() assert (_rewardSystem == reward_system).all() assert _sequence_no == sequence_no assert (_currentLane == current_lane).all() assert (_actionSpace == action_space).all() assert (_possibleActions == possible_actions).all() assert (_numberOfVehiclesLoaded == number_of_vehicles_loaded).all()
def test_shifts_caused(): env = RoRoDeck() env.reset() current_lane = env.current_Lane env.step(0) env.current_Lane = current_lane assert env._get_number_of_shifts(1) == 1 assert env._get_number_of_shifts(0) == 0
def test_get_current_state(): env = RoRoDeck() state = env.current_state assert np.shape(state) == (25,) env.reset() state = env.current_state assert np.shape(state) == (25,) env.step(env.action_space_sample()) assert not np.all(state == env.current_state)
def test_sarsa_agent(): env = RoRoDeck() env.rows = 12 env.lanes = 12 env.reset() # set all units to equal length env.vehicle_data[4][1] = 2 env.vehicle_data[4][3] = 2 agent = SARSA(env, None) assert len(agent.q_table.keys()) == 0 agent.number_of_episodes = 1 agent.train() assert len(agent.q_table.keys()) == 61
def test_possible_actions(): env = RoRoDeck() env.vehicle_data[5][4] = 0 # disable reefer for test env.reset() number_of_vehicle = env.vehicle_data[1][0] assert len(env.vehicle_data.T) == len(env.possible_actions) == 5 for i in range(number_of_vehicle): assert len(env.vehicle_data.T) == len(env.possible_actions) == 5 env.step(0) assert len(env.possible_actions) == len(env.vehicle_data.T) - 1
def test_roro_deck(): env = RoRoDeck() env.reset() done = False i = 0 while not done: observation_, reward, done, info = env.step(env.action_space_sample()) i += 1 assert i <= 100
def test_end_of_lane_method(): env = RoRoDeck() initial_end_of_lanes = env.initial_end_of_lanes env.reset() end_of_lanes = env.end_of_lanes.copy() assert np.all(end_of_lanes == initial_end_of_lanes) current_lane = env.current_Lane length = env.vehicle_data[4][env.possible_actions[0]] env.step(env.possible_actions[0]) assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length
def test_stepMethod(): env = RoRoDeck() env.reset() vehicle_data = env.vehicle_data.copy() end_of_lanes = env.end_of_lanes.copy() grid = env.grid.copy() grid_destination = env.grid_destination.copy() vehicle_counter = env.vehicle_Counter.copy() mandatory_cargo_mask = env.mandatory_cargo_mask.copy() loaded_vehicles = env.loaded_vehicles.copy() reward_system = env.reward_system.copy() sequence_no = env.sequence_no current_lane = env.current_Lane.copy() action_space = env.action_space.copy() possible_actions = env.possible_actions.copy() np.random.seed(0) action = action_space[mandatory_cargo_mask][0] if action not in possible_actions: action = env.action_space_sample() env.step(action) destination = vehicle_data[3][action] length = vehicle_data[4][action] for i in range(length): grid.T[current_lane][end_of_lanes[current_lane] + i] = sequence_no grid_destination.T[current_lane][end_of_lanes[current_lane] + i] = destination loaded_vehicles[current_lane][vehicle_counter[current_lane]] = action assert (env.grid == grid).all() assert (env.grid_destination == grid_destination).all() assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length assert env.sequence_no == sequence_no + 1 assert (env.loaded_vehicles == loaded_vehicles).all() assert (env.vehicle_Counter[current_lane] == vehicle_counter[current_lane] + 1) _vehicle_data = env.vehicle_data _reward_system = env.reward_system assert (_vehicle_data == vehicle_data).all() assert (_reward_system == reward_system).all()
def test_max_action_method_sarsa(): env = RoRoDeck() env.rows = 12 env.lanes = 12 state = env.reset() agent = SARSA(env, None) agent.q_table[state.tobytes()] = np.zeros(4) assert np.count_nonzero(agent.q_table[state.tobytes()]) == 0 agent.q_table[state.tobytes()][2] = 1 agent.q_table[state.tobytes()][3] = 2 assert agent.max_action(state, env.possible_actions) == 3 env.possible_actions = np.array([0, 1, 2]) assert agent.max_action(state, env.possible_actions) == 2
"EnvRows": self.env.rows, "VehicleData": self.env.vehicle_data, "TrainingTime": self.training_time } info = "TDQ" + "_L" + str(self.env.lanes) + "-R" + str(self.env.rows) super().save_model(path, info) # Example of usage if __name__ == '__main__': np.random.seed(0) loggingBase = LoggingBase() module_path = loggingBase.module_path env = RoRoDeck(lanes=10, rows=12) number_of_episodes = 5000 agent = TDQLearning(env=env, module_path=module_path, number_of_episodes=number_of_episodes) model, total_rewards, steps_to_exit, eps_history, state_expansion = agent.train( ) plotter = Plotter(module_path, agent.number_of_episodes, show_plot=True) plotter.plotRewardPlot(total_rewards) plotter.plotStateExp(state_expansion) plotter.plotEPSHistory(np.array(eps_history)) plotter.plot_cargo_units_loaded(np.array(steps_to_exit)) evaluator = Evaluator(env.vehicle_data, env.grid)
def test_shifts(): random_actions_1 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2, 0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3 ] random_actions_2 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2, 0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3 ] random_actions_3 = [ 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3, 0, 0, 0, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4, 1, 4, 0, 3, 2, 4 ] i = 0 env1.reset() done_ = False while not done_: action_ = random_actions_1[i] _, _, done_, _ = env1.step(action_) i += 1 i = 0 env2.reset() done_ = False while not done_: action_ = random_actions_2[i] _, _, done_, _ = env2.step(action_) i += 1 env3 = RoRoDeck(lanes=8, rows=20) env3.reset() i = 0 done_ = False while not done_: action_ = random_actions_3[i % len(random_actions_3)] _, _, done_, _ = env3.step(action_) i += 1 evaluator1 = Evaluator(env1.vehicle_data, env1.grid) shifts1 = evaluator1.evaluate(env1.get_stowage_plan()).shifts evaluator2 = Evaluator(env2.vehicle_data, env2.grid) shifts2 = evaluator2.evaluate(env2.get_stowage_plan()).shifts evaluator3 = Evaluator(env3.vehicle_data, env3.grid) shifts3 = evaluator3.evaluate(env3.get_stowage_plan()).shifts assert shifts1 == 3 assert shifts2 == 3 assert shifts3 == 19
from env.roroDeck import RoRoDeck from analysis.evaluator import * # Preparation for Tests np.random.seed(0) # Create a random stowagePlan env1 = RoRoDeck() env2 = RoRoDeck() env1.reset() env2.reset() done = False total_rewards_env1 = 0 while not done: action = env1.action_space_sample() observation_, reward, done, info = env1.step(action) total_rewards_env1 += reward done = False total_rewards_env2 = 0 while not done: action = env2.action_space_sample() observation_, reward, done, info = env2.step(action) total_rewards_env2 += reward # Test if the Evaluator and the agents estimate are consensually
def test_find_current_lane(): env = RoRoDeck(lanes=8, rows=12) env.reset() assert env.current_Lane == 3 env.step(env.action_space_sample()) assert env.current_Lane == 4
def test_is_terminal_state(): env = RoRoDeck() assert not env._is_terminal_state() env.reset() assert not env._is_terminal_state() done = False for i in range(4): state, reward, done, info = env.step(env.action_space_sample()) assert not env._is_terminal_state() while not done: state, reward, done, info = env.step(env.action_space_sample()) assert env._is_terminal_state()