예제 #1
0
    memory = Memory(100000)
    if render_map:
        root, canvas = init_map()

    steps = 0
    scores = []
    epsilon = 1.0
    for episode in range(hp.num_episodes):
        state = env.reset()
        state = pre_process(state)
        history = np.stack((state, state, state, state), axis=2)
        history = np.reshape([history], (84, 84, 4))

        for i in range(3):
            action = env.action_space.sample()
            next_state, reward, done, info = env.step(action)
            next_state = pre_process(next_state)
            next_state = np.reshape(next_state, (84, 84, 1))
            history = np.append(next_state, history[:, :, :3], axis=2)

        score = 0
        prev_life = 20
        episode_len = 0
        while True:
            env.render(mode='rgb_array')
            steps += 1

            qvalue = model(to_tensor(history).unsqueeze(0))
            action = get_action(epsilon, qvalue, num_actions)
            next_state, reward, done, info = env.step(action)
예제 #2
0
video = cv2.VideoWriter('record/mob-fun.avi', cv2.VideoWriter_fourcc('M','J','P','G'), 20, (800, 600))

done = False
write = False
batch_size = 1

for i in range(1):
    score = 0
    count = 0
    env.reset()
    while True:
        count += 1
        env.render(mode="rgb_array")
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        num_states = info['number_of_observations_since_last_state']
        num_rewards = info['number_of_rewards_since_last_state']
        observation = info['observation']
        # print(num_states, num_rewards)

        score += reward
        obs = np.reshape(obs, (600, 800, 3))
        img, origin_img, dim = prep_image(obs, inp_dim)

        with torch.no_grad():
            prediction = model(img, CUDA)

        prediction = write_results(prediction, confidence, num_classes, 
                                   nms = True, nms_conf = nms_thesh)