예제 #1
0
파일: tdq.py 프로젝트: sbraun92/thesisSB
        }
        info = "TDQ" + "_L" + str(self.env.lanes) + "-R" + str(self.env.rows)

        super().save_model(path, info)


# Example of usage
if __name__ == '__main__':
    np.random.seed(0)

    loggingBase = LoggingBase()
    module_path = loggingBase.module_path
    env = RoRoDeck(lanes=10, rows=12)
    number_of_episodes = 5000

    agent = TDQLearning(env=env,
                        module_path=module_path,
                        number_of_episodes=number_of_episodes)

    model, total_rewards, steps_to_exit, eps_history, state_expansion = agent.train(
    )
    plotter = Plotter(module_path, agent.number_of_episodes, show_plot=True)
    plotter.plotRewardPlot(total_rewards)
    plotter.plotStateExp(state_expansion)
    plotter.plotEPSHistory(np.array(eps_history))
    plotter.plot_cargo_units_loaded(np.array(steps_to_exit))

    evaluator = Evaluator(env.vehicle_data, env.grid)
    evaluation = evaluator.evaluate(env.get_stowage_plan())
    print(evaluation)
예제 #2
0
def test_shifts():
    random_actions_1 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2,
        0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4,
        1, 4, 0, 3
    ]

    random_actions_2 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2, 3, 2, 3, 4, 4, 2, 1, 2,
        0, 3, 0, 2, 4, 3, 2, 1, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 1, 3, 1, 3, 3, 1, 1, 1, 0, 3, 0, 4,
        1, 4, 0, 3
    ]

    random_actions_3 = [
        0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2, 4, 3, 2, 3,
        0, 0, 0, 3, 0, 3, 3, 0, 3, 0, 2, 1, 0, 4, 0, 3, 4, 4, 1, 0, 2, 4, 4, 2,
        4, 3, 2, 3, 0, 0, 0, 3, 2, 1, 4, 1, 0, 0, 0, 0, 0, 1, 0, 3, 1, 4, 1, 2,
        4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 4, 2, 2, 4, 1, 3, 3, 3, 1, 2, 2,
        4, 1, 2, 4, 0, 0, 2, 0, 4, 4, 4, 1, 1, 4, 1, 4, 1, 3, 1, 3, 3, 1, 1, 1,
        0, 3, 0, 4, 1, 4, 0, 3, 2, 4
    ]

    i = 0
    env1.reset()
    done_ = False
    while not done_:
        action_ = random_actions_1[i]
        _, _, done_, _ = env1.step(action_)
        i += 1

    i = 0
    env2.reset()
    done_ = False
    while not done_:
        action_ = random_actions_2[i]
        _, _, done_, _ = env2.step(action_)
        i += 1

    env3 = RoRoDeck(lanes=8, rows=20)
    env3.reset()
    i = 0
    done_ = False
    while not done_:
        action_ = random_actions_3[i % len(random_actions_3)]
        _, _, done_, _ = env3.step(action_)
        i += 1
    evaluator1 = Evaluator(env1.vehicle_data, env1.grid)
    shifts1 = evaluator1.evaluate(env1.get_stowage_plan()).shifts

    evaluator2 = Evaluator(env2.vehicle_data, env2.grid)
    shifts2 = evaluator2.evaluate(env2.get_stowage_plan()).shifts

    evaluator3 = Evaluator(env3.vehicle_data, env3.grid)
    shifts3 = evaluator3.evaluate(env3.get_stowage_plan()).shifts

    assert shifts1 == 3
    assert shifts2 == 3
    assert shifts3 == 19