Ejemplo n.º 1
0
saver.restore(sess, '/home/carl/PycharmProjects/OpenAIUniverseRL/train')

while True:
    action_n = [actionSpace[:][action]
                for ob in observation_n]  # your agent here
    observation_n, reward_n, done_n, info = env.step(action_n)
    rewardNormalized = np.array([reward_n[0] / 40
                                 ])  # to make the max reward closer to 1.0
    if observation_n[0]:
        pixels_raw = observation_n[0].get("vision")[
            84:592, 18:818]  # Size 508x800. Will be resized by 0.1
        grayscaleImg = ImageProcessing.pre_process_image(
            pixels_raw)  # Makes a 51x80 'uint8' list
        counter += 1
        if counter == 1:
            motionTracer = ImageProcessing.MotionTracer(
                pixels_raw)  # create the object
        else:
            motionTracer.process(pixels_raw)  # do stuff to create motion image
            if counter > 399:  # Ignore the unearned reward while the car still has it's initial momentum at the start
                # Add data to the replay memory
                # Corresponding Action and Reward gets added one env.step latter than state and q_values
                RM.stateArray = np.vstack(
                    (RM.stateArray, state))  # 'state' comes from previous step
                RM.q_valueArray = np.concatenate(
                    (RM.q_valueArray, Q_values_est),
                    axis=0)  # 'Q_values_est' comes from previous step
                RM.actionArray = np.concatenate(
                    (RM.actionArray, action_as_1D_array), axis=0)
                RM.rewardArray = np.concatenate(
                    (RM.rewardArray, rewardNormalized), axis=0)
        # state = Returs 'grayscale' channel [0] and 'grayscale-motion trace' channel [1]