def test_warehouse_04(): env = make_test_warehouse_env_01() expected_value = None policy = MCTSPolicy() we.execute(env, policy) print('**' * 30) print('[Result]') print('Finish time clock value=', env.finish_time_clock, ':uncompleted orders=', len(env.available_orders))
def test_warehouse_greedy(order_count): start = time.time() env = make_test_warehouse_env(order_count) #order 60개기준 expected_value = None policy = GreedyPolicy() we.execute(env, policy) print('**' * 30) print('[Result]GreedyPolicy') print('Finish time clock value=', env.finish_time_clock, ':uncompleted orders=', len(env.available_orders)) end = time.time() print('time', (end - start))
def test_warehouse_random(order_count, max_iteration): start = time.time() #반복수행 best = 99999999999 for i in range(max_iteration): env = make_test_warehouse_env(order_count) #order 60개기준 policy = RandomPolicy() we.execute(env, policy) if best > env.finish_time_clock: best = env.finish_time_clock #print('Finish time clock value=', env.finish_time_clock,':uncompleted orders=',len(env.available_orders)) print('[Result] RandomPolicy') print("Random Best=", best) end = time.time() print('time', (end - start))
def test_warehouse_mcts(order_count, max_iteration=200, exploration_constant=1.41): start = time.time() env = make_test_warehouse_env(order_count) #order 60개기준 expected_value = None policy = MCTSPolicy(max_iteration=200, exploration_constant=1.41) we.execute(env, policy) print('**' * 30) print('[Result] MCTS') print('Finish time clock value=', env.finish_time_clock, ':uncompleted orders=', len(env.available_orders)) print('Order=', policy.result_order) print('Folklift=', policy.result_folklift) end = time.time() print('time', (end - start))
def rollout(self, env, node): clone_env = env.copy() #초기 상태를 복사해서 실행 #Order의 순서는 현재 상태까지 상태 사용, 나머지는 random 정렬 (fast roll out) pre_order_sequence = [] random_order_sequence = clone_env.orders.copy() for order_no in node.order_no_sequence: order = clone_env.get_order_by_no(order_no) random_order_sequence.remove(order) pre_order_sequence.append(order) np.random.shuffle(random_order_sequence) order_sequence = pre_order_sequence + random_order_sequence sequentail_policy = SequentialPolicy(order_sequence) we.execute(clone_env, sequentail_policy) reward = 0 - clone_env.finish_time_clock return reward