예제 #1
0
def test_dqn_agent():
    env = RoRoDeck(lanes=8, rows=12)
    n_games = 2
    agent = DQLearningAgent(env=env,
                            module_path=None,
                            gamma=0.999,
                            epsilon=1.0,
                            alpha=0.0005,
                            mem_size=10000000,
                            batch_size=64,
                            epsilon_min=0.01,
                            epsilon_dec=0.99999)

    actions_taken = 0
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()
        while not done:
            action = env.action_space_sample()
            state_actions = env.possible_actions
            observation_, reward, done, info = env.step(action)
            actions_taken += 1
            new_state_actions = env.possible_actions
            score += reward
            agent.memory.store_transition(observation, action, reward,
                                          observation_, done)
            observation = observation_
            agent.learn()

    assert np.shape(np.nonzero(agent.memory.reward_memory))[1] == actions_taken
    assert np.shape(np.nonzero(agent.memory.terminal_memory))[1] == 2
예제 #2
0
def test_env_parameter():
    env = RoRoDeck()
    env.reset()

    assert env.vehicle_data.shape[0] == 6
    assert 0 <= env.current_Lane <= env.lanes
    assert env.grid.shape == (env.rows, env.lanes)
    assert env.sequence_no >= 0 and isinstance(env.sequence_no, int)
    assert len(env.reward_system) == 4
예제 #3
0
def test_reefer_positions():
    env = RoRoDeck()
    env.vehicle_data[5][4] = 1  # enable reefer for test
    env.reset()

    env.current_Lane = 0
    assert env.grid_reefer.T[env.current_Lane][-1] == 1
    assert len(env.vehicle_data.T) == len(env.get_possible_actions_of_state()) == 5

    env.current_Lane = 4
    assert env.grid_reefer.T[env.current_Lane][-1] == 0
    assert len(env.get_possible_actions_of_state()) == 4
    assert 4 not in env.get_possible_actions_of_state()
예제 #4
0
def test_is_action_legal():
    env = RoRoDeck()
    env.reset()
    number_vehicle = env.vehicle_data[1][0]

    assert env._is_action_legal(0)
    for i in range(number_vehicle):
        assert env._is_action_legal(0)
        env.step(0)
    assert not env._is_action_legal(0)
예제 #5
0
def test_reset_method():
    env = RoRoDeck()
    vehicle_data = env.vehicle_data.copy()
    end_of_lanes = env.end_of_lanes.copy()
    grid = env.grid.copy()
    grid_destination = env.grid_destination.copy()
    grid_vehicle_type = env.grid_vehicle_type.copy()
    capacity = env.total_capacity.copy()
    vehicle_counter = env.vehicle_Counter.copy()
    mandatory_cargo_mask = env.mandatory_cargo_mask.copy()
    loaded_vehicles = env.loaded_vehicles.copy()
    reward_system = env.reward_system.copy()
    sequence_no = env.sequence_no
    current_lane = env.current_Lane.copy()
    action_space = env.action_space.copy()
    possible_actions = env.possible_actions.copy()
    number_of_vehicles_loaded = env.number_of_vehicles_loaded.copy()

    env.reset()
    env.step(env.action_space_sample())
    env.step(env.action_space_sample())
    env.reset()

    _vehicleData = env.vehicle_data
    _endOFLanes = env.end_of_lanes
    _grid = env.grid
    _gridDestination = env.grid_destination
    _gridVehicleType = env.grid_vehicle_type
    _capacity = env.total_capacity
    _vehicleCounter = env.vehicle_Counter
    _mandatoryCargoMask = env.mandatory_cargo_mask
    _loadedVehicles = env.loaded_vehicles
    _rewardSystem = env.reward_system
    _sequence_no = env.sequence_no
    _currentLane = env.current_Lane
    _actionSpace = env.action_space
    _possibleActions = env.possible_actions
    _numberOfVehiclesLoaded = env.number_of_vehicles_loaded

    assert (_vehicleData == vehicle_data).all()
    assert (_endOFLanes == end_of_lanes).all()
    assert (_grid == grid).all()
    assert (_gridDestination == grid_destination).all()
    assert (_gridVehicleType == grid_vehicle_type).all()
    assert (_capacity == capacity).all()
    assert (_vehicleCounter == vehicle_counter).all()
    assert (_mandatoryCargoMask == mandatory_cargo_mask).all()
    assert (_loadedVehicles == loaded_vehicles).all()
    assert (_rewardSystem == reward_system).all()
    assert _sequence_no == sequence_no
    assert (_currentLane == current_lane).all()
    assert (_actionSpace == action_space).all()
    assert (_possibleActions == possible_actions).all()
    assert (_numberOfVehiclesLoaded == number_of_vehicles_loaded).all()
예제 #6
0
def test_shifts_caused():
    env = RoRoDeck()
    env.reset()

    current_lane = env.current_Lane
    env.step(0)
    env.current_Lane = current_lane
    assert env._get_number_of_shifts(1) == 1
    assert env._get_number_of_shifts(0) == 0
예제 #7
0
def test_get_current_state():
    env = RoRoDeck()

    state = env.current_state
    assert np.shape(state) == (25,)
    env.reset()
    state = env.current_state
    assert np.shape(state) == (25,)

    env.step(env.action_space_sample())
    assert not np.all(state == env.current_state)
예제 #8
0
def test_sarsa_agent():
    env = RoRoDeck()
    env.rows = 12
    env.lanes = 12
    env.reset()
    # set all units to equal length
    env.vehicle_data[4][1] = 2
    env.vehicle_data[4][3] = 2

    agent = SARSA(env, None)

    assert len(agent.q_table.keys()) == 0
    agent.number_of_episodes = 1
    agent.train()

    assert len(agent.q_table.keys()) == 61
예제 #9
0
def test_possible_actions():
    env = RoRoDeck()
    env.vehicle_data[5][4] = 0  # disable reefer for test
    env.reset()
    number_of_vehicle = env.vehicle_data[1][0]

    assert len(env.vehicle_data.T) == len(env.possible_actions) == 5

    for i in range(number_of_vehicle):
        assert len(env.vehicle_data.T) == len(env.possible_actions) == 5
        env.step(0)

    assert len(env.possible_actions) == len(env.vehicle_data.T) - 1
예제 #10
0
def test_roro_deck():
    env = RoRoDeck()
    env.reset()
    done = False
    i = 0

    while not done:
        observation_, reward, done, info = env.step(env.action_space_sample())
        i += 1
        assert i <= 100
예제 #11
0
def test_end_of_lane_method():
    env = RoRoDeck()
    initial_end_of_lanes = env.initial_end_of_lanes
    env.reset()
    end_of_lanes = env.end_of_lanes.copy()
    assert np.all(end_of_lanes == initial_end_of_lanes)

    current_lane = env.current_Lane
    length = env.vehicle_data[4][env.possible_actions[0]]

    env.step(env.possible_actions[0])

    assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length
예제 #12
0
def test_stepMethod():
    env = RoRoDeck()
    env.reset()

    vehicle_data = env.vehicle_data.copy()
    end_of_lanes = env.end_of_lanes.copy()
    grid = env.grid.copy()
    grid_destination = env.grid_destination.copy()
    vehicle_counter = env.vehicle_Counter.copy()
    mandatory_cargo_mask = env.mandatory_cargo_mask.copy()
    loaded_vehicles = env.loaded_vehicles.copy()
    reward_system = env.reward_system.copy()
    sequence_no = env.sequence_no
    current_lane = env.current_Lane.copy()
    action_space = env.action_space.copy()
    possible_actions = env.possible_actions.copy()

    np.random.seed(0)

    action = action_space[mandatory_cargo_mask][0]
    if action not in possible_actions:
        action = env.action_space_sample()

    env.step(action)

    destination = vehicle_data[3][action]
    length = vehicle_data[4][action]

    for i in range(length):
        grid.T[current_lane][end_of_lanes[current_lane] + i] = sequence_no
        grid_destination.T[current_lane][end_of_lanes[current_lane] + i] = destination

    loaded_vehicles[current_lane][vehicle_counter[current_lane]] = action

    assert (env.grid == grid).all()
    assert (env.grid_destination == grid_destination).all()
    assert env.end_of_lanes[current_lane] == end_of_lanes[current_lane] + length
    assert env.sequence_no == sequence_no + 1
    assert (env.loaded_vehicles == loaded_vehicles).all()
    assert (env.vehicle_Counter[current_lane] == vehicle_counter[current_lane] + 1)

    _vehicle_data = env.vehicle_data
    _reward_system = env.reward_system

    assert (_vehicle_data == vehicle_data).all()
    assert (_reward_system == reward_system).all()
예제 #13
0
def test_max_action_method_sarsa():
    env = RoRoDeck()
    env.rows = 12
    env.lanes = 12
    state = env.reset()

    agent = SARSA(env, None)

    agent.q_table[state.tobytes()] = np.zeros(4)

    assert np.count_nonzero(agent.q_table[state.tobytes()]) == 0
    agent.q_table[state.tobytes()][2] = 1
    agent.q_table[state.tobytes()][3] = 2

    assert agent.max_action(state, env.possible_actions) == 3

    env.possible_actions = np.array([0, 1, 2])

    assert agent.max_action(state, env.possible_actions) == 2
예제 #14
0
파일: tdq.py 프로젝트: sbraun92/thesisSB
            "EnvRows": self.env.rows,
            "VehicleData": self.env.vehicle_data,
            "TrainingTime": self.training_time
        }
        info = "TDQ" + "_L" + str(self.env.lanes) + "-R" + str(self.env.rows)

        super().save_model(path, info)


# Example of usage
if __name__ == '__main__':
    np.random.seed(0)

    loggingBase = LoggingBase()
    module_path = loggingBase.module_path
    env = RoRoDeck(lanes=10, rows=12)
    number_of_episodes = 5000

    agent = TDQLearning(env=env,
                        module_path=module_path,
                        number_of_episodes=number_of_episodes)

    model, total_rewards, steps_to_exit, eps_history, state_expansion = agent.train(
    )
    plotter = Plotter(module_path, agent.number_of_episodes, show_plot=True)
    plotter.plotRewardPlot(total_rewards)
    plotter.plotStateExp(state_expansion)
    plotter.plotEPSHistory(np.array(eps_history))
    plotter.plot_cargo_units_loaded(np.array(steps_to_exit))

    evaluator = Evaluator(env.vehicle_data, env.grid)
예제 #15
0
def test_shifts():
    random_actions_1 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2,
        0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4,
        1, 4, 0, 3
    ]

    random_actions_2 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2,
        0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4,
        1, 4, 0, 3
    ]

    random_actions_3 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2,
        4, 3, 2, 3, 0, 0, 0, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2,
        4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 1, 3, 1, 3, 3, 1, 1, 1,
        0, 3, 0, 4, 1, 4, 0, 3, 2, 4
    ]

    i = 0
    env1.reset()
    done_ = False
    while not done_:
        action_ = random_actions_1[i]
        _, _, done_, _ = env1.step(action_)
        i += 1

    i = 0
    env2.reset()
    done_ = False
    while not done_:
        action_ = random_actions_2[i]
        _, _, done_, _ = env2.step(action_)
        i += 1

    env3 = RoRoDeck(lanes=8, rows=20)
    env3.reset()
    i = 0
    done_ = False
    while not done_:
        action_ = random_actions_3[i % len(random_actions_3)]
        _, _, done_, _ = env3.step(action_)
        i += 1
    evaluator1 = Evaluator(env1.vehicle_data, env1.grid)
    shifts1 = evaluator1.evaluate(env1.get_stowage_plan()).shifts

    evaluator2 = Evaluator(env2.vehicle_data, env2.grid)
    shifts2 = evaluator2.evaluate(env2.get_stowage_plan()).shifts

    evaluator3 = Evaluator(env3.vehicle_data, env3.grid)
    shifts3 = evaluator3.evaluate(env3.get_stowage_plan()).shifts

    assert shifts1 == 3
    assert shifts2 == 3
    assert shifts3 == 19
예제 #16
0
from env.roroDeck import RoRoDeck
from analysis.evaluator import *

# Preparation for Tests
np.random.seed(0)

# Create a random stowagePlan
env1 = RoRoDeck()
env2 = RoRoDeck()

env1.reset()
env2.reset()

done = False
total_rewards_env1 = 0

while not done:
    action = env1.action_space_sample()
    observation_, reward, done, info = env1.step(action)
    total_rewards_env1 += reward

done = False
total_rewards_env2 = 0

while not done:
    action = env2.action_space_sample()
    observation_, reward, done, info = env2.step(action)
    total_rewards_env2 += reward


# Test if the Evaluator and the agents estimate are consensually
예제 #17
0
def test_find_current_lane():
    env = RoRoDeck(lanes=8, rows=12)
    env.reset()
    assert env.current_Lane == 3
    env.step(env.action_space_sample())
    assert env.current_Lane == 4
예제 #18
0
def test_is_terminal_state():
    env = RoRoDeck()
    assert not env._is_terminal_state()

    env.reset()
    assert not env._is_terminal_state()

    done = False
    for i in range(4):
        state, reward, done, info = env.step(env.action_space_sample())
    assert not env._is_terminal_state()

    while not done:
        state, reward, done, info = env.step(env.action_space_sample())
    assert env._is_terminal_state()