Esempio n. 1
0
def main():

    replay_memory_size = 400000
    replay_buffer_path_1 = "/home/pulver/replay_2.pickle"

    #replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer_1 = ExperienceReplayBuffer(capacity=replay_memory_size)

    timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer_1.load(replay_buffer_path_1)
    timer_stop = time.time()

    print "Time episode: " + str(timer_stop - timer_start) + " seconds"
    print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
    print "Size_1 : " + str(replay_buffer_1.return_size())
    print("")

    print "Starting shuffling..."
    replay_buffer_1.shuffle()
    print "Shuffle completed!"
    print "Starting saving..."
    replay_buffer_1.save("/home/pulver/replay_2.pickle")

    print "Size_1 : " + str(replay_buffer_1.return_size())
Esempio n. 2
0
def main():

    replay_memory_size = 400000
    replay_buffer_path_1 = "/home/pulver/Desktop/marine_descending/replay_buffer.pickle"
    # replay_buffer_path_2 = "/home/thebeast/Desktop/simulation_4/replay_buffer.pickle"
    replay_buffer_path = "/home/pulver/Desktop/marine_descending/replay_buffer_neutral_clean.pickle"
    replay_buffer_1 = ExperienceReplayBuffer(capacity=replay_memory_size)
    # replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size)

    timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer_1.load(replay_buffer_path_1)
    # replay_buffer_2.load(replay_buffer_path_2)
    timer_stop = time.time()

    print "Time episode: " + str(timer_stop - timer_start) + " seconds"
    print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
    print "Size_1 : " + str(replay_buffer_1.return_size())
    # print "Size_2 : " + str(replay_buffer_2.return_size())
    print("")

    replay_buffer_1.pop_experience(27300)
    replay_buffer_1.save(replay_buffer_path)
    # replay_buffer_2.pop_experience(12898)
    # replay_buffer_2.save("/home/thebeast/Desktop/simulation_4/replay_buffer_clean.pickle")

    print "Size_1 : " + str(replay_buffer_1.return_size())
Esempio n. 3
0
def main():
    
    replay_memory_size = 400000
    replay_buffer_path = "/home/thebeast/Desktop/simulation_7/replay_buffer_positive.pickle" 
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    
    #timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer.load(replay_buffer_path)
    #timer_stop = time.time()
    original_size = replay_buffer.return_size()
    print "Original size: " + str(original_size)
    counter_removed = replay_buffer.clean_action("descend")
    print "New size: " + str(replay_buffer.return_size())
    print "Number experiences removed:" + str(counter_removed)
    print "Percentage removed experiences: " + str(100 * counter_removed/float(original_size)) + "%"
    replay_buffer.save("/home/thebeast/Desktop/simulation_7/replay_buffer_positive_clean.pickle")
Esempio n. 4
0
def main():

    #TODO: Variables to change based on paths and buffer size
    to_keep_original_experiences = 370664  #remove with a pop right the experiences in the original buffer (useful if corrupted)
    sub_buffer_size = 37066  #size of the sub-buffer created
    replay_memory_size = 500000
    root_path = "/media/pulver/6a87c355-c51f-4e43-9527-f0c5bd730ec4/pulver/Desktop/simulation_11/negative/"
    replay_buffer_path = "/media/pulver/6a87c355-c51f-4e43-9527-f0c5bd730ec4/pulver/Desktop/simulation_11/negative/replay_buffer_negative_big_shuffled_370664.pickle"

    #Load the main buffer
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    if (os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
        print "Size: " + str(replay_buffer.return_size())
    else:
        print "Buffer replay does not exist!"

    # print("Starting pruning experiences...")
    # print("Initial size ..... " + str(replay_buffer.return_size()))
    # to_prune = replay_buffer.return_size() - to_keep_original_experiences
    # replay_buffer.pop_experience(to_prune, side="right")
    # print("New size ..... " + str(replay_buffer.return_size()))
    # print("Done!")

    print("Starting shuffling experiences...")
    replay_buffer.shuffle()
    print("Done!")

    print("Starting multisplit...")
    print "Sit back and relax, it may take a while..."
    multibuffer_list = [
        (root_path + "replay_buffer_negative_1.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_2.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_3.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_4.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_5.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_6.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_7.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_8.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_9.pickle", sub_buffer_size),
        (root_path + "replay_buffer_negative_10.pickle", sub_buffer_size)
    ]
    replay_buffer.multisplit(multibuffer_list)
    print("Done!")
Esempio n. 5
0
def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """

    timer_total_start = time.time()
    rospy.init_node("DeepReinforcedLanding")
    rospy.loginfo("----- Deep Reinforced Landing Node -----")
    replay_memory_size = 400000
    replay_buffer_path = "./replay_buffer_test.pickle"
    #replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)

    # Load the Replay buffer from file or accumulate experiences

    replay_buffer.load(replay_buffer_path)

    #----------------- PRINT EXPERIENCE FOR TESTING-----------------

    index = 0
    increment = 1
    image, image_t1 = replay_buffer.debug_experience(index=index)
    image_t2, image_t3 = replay_buffer.debug_experience(index=index +
                                                        increment)
    image_t4, image_t5 = replay_buffer.debug_experience(index=index +
                                                        increment * 2)
    image_t6, image_t7 = replay_buffer.debug_experience(index=index +
                                                        increment * 3)
    image_t8, image_t9 = replay_buffer.debug_experience(index=index +
                                                        increment * 4)
    image_vstack = np.vstack(
        (image, image_t1, image_t2, image_t3, image_t4, image_t5, image_t6,
         image_t7, image_t8, image_t9))
    cv2.imshow("experience 0-9 vertical stack", image_vstack)
    while True:
        if cv2.waitKey(33) == ord('q'):
            cv2.destroyAllWindows()
            break
Esempio n. 6
0
def main():
    
    replay_memory_size = 400000
    replay_buffer_path = "/home/thebeast/Desktop/buffer_to_merge/negative/buffer_negative.pickle" 
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    
    #timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer.load(replay_buffer_path)
    #timer_stop = time.time()
    original_size = replay_buffer.return_size()
    print "Original size: " + str(original_size)
    # counter_removed = replay_buffer.clean_action_reward("descend", -1.0)
    # print "New size: " + str(replay_buffer.return_size())
    # print "Number experiences removed:" + str(counter_removed)
    # print "Percentage removed experiences: " + str(100 * counter_removed/float(original_size)) + "%"
    # replay_buffer.save("./clean_buffer_positive.pickle")
    counter = replay_buffer.count_experiences("descend", -1.0)
    print "Tot wrond experiences: " + str(counter)
Esempio n. 7
0
def main():
    """
    Initialize and run the rospy node
    """
    timer_total_start = time.time()
    rospy.init_node("ReplayBufferFiller")
    rospy.loginfo("----- Replay Buffer Filler -----")

    replay_memory_size = 400000
    replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    if (os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
    else:
        print "Buffer replay does not exist!"

    replay_buffer_path_2 = "./replay_buffer_2.pickle"
    replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size)
    if (os.path.isfile(replay_buffer_path_2) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path_2))
        replay_buffer_2.load(replay_buffer_path_2)
        # Merge the two replay buffers in the first
        replay_buffer.append(replay_buffer_2)
    else:
        print "Second replay buffer does not exist!"

    # Create a subscriber fot the greyscale image
    rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw",
                     ROSImage, image_callback)

    images_stack_size = 4
    tot_steps = 3000000  # finite-horizont simulation
    frame_preliminary = 0
    saving_every_tot_experiences = 2500
    noop_time = 2.0  # pause in seconds between actions
    steps_per_episodes = 20
    #saving_every_tot_experiences = 450  #TODO SET TO 250 JUST FOR TEST
    #r = rospy.Rate(10)  # 10hz
    num_ground_plane = 70
    frame_per_ground_plane = int(replay_memory_size / num_ground_plane)
    actual_ground_index = 0

    episode = 1
    while True:
        if replay_buffer.return_size() >= replay_memory_size:
            break

        # Reset the ground in the environment every 25k frames
        ground_index = replay_buffer.return_size() / frame_per_ground_plane
        if ground_index != actual_ground_index:
            generate_new_world(ground_index)
        actual_ground_index = ground_index

        cumulated_reward = 0
        print ""
        print "Preliminary Episode: " + str(episode)
        # Reset UAV at random pose
        reset_pose()
        send_action('stop')
        rospy.sleep(1.0)
        #get_image()
        image_t = _last_image
        # When the replay buffer is empty, fill it with the same picture 4
        # times
        image_t = np.stack([image_t] * images_stack_size,
                           axis=2)  # create a stack of X images
        timer_start = time.time()
        actual_time = rospy.get_rostime()
        rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
        frame_episode = 0
        for step in range(tot_steps):
            # Execute a random action in the world and observe the reward and
            # state_t1.
            action = get_random_action()
            #print "Action taken: " + action
            send_action(action)
            rospy.sleep(noop_time)
            # Acquire a new frame and convert it in a numpy array
            #get_image()
            image_t1 = _last_image
            # Get the reward and done status
            done_reward = get_done_reward()
            reward = done_reward.reward
            done = done_reward.done
            # Calculate the new cumulated_reward
            cumulated_reward += reward
            # state_t1, reward, done, info = env.step(action)
            image_t1 = np.expand_dims(image_t1, 2)
            # stack the images
            image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
            # Store the experience in the replay buffer
            replay_buffer.add_experience(image_t, action, reward, image_t1,
                                         done)
            frame_preliminary += 1  # To call every time a frame is obtained
            image_t = image_t1
            timer_episode_stop = time.time()
            frame_episode += 1
            send_action("stop")
            if frame_episode >= steps_per_episodes:
                done = True
            # Save the buffer every 25000 episodes
            if replay_buffer.return_size() % saving_every_tot_experiences == 0:
                timer_saving_start = time.time()
                print "Saving the replay buffer in: " + replay_buffer_path
                print "Sit back and relax, it may take a while..."
                replay_buffer.save(replay_buffer_path)
                timer_saving_stop = time.time()
                print "Done!"
                print "Time to save the buffer: " + str(
                    timer_saving_stop - timer_saving_start) + " seconds"
                print "Time to save the buffer: " + str(
                    (timer_saving_stop - timer_saving_start) / 60) + " minutes"
            if done:
                episode += 1
                timer_stop = time.time()
                actual_time = rospy.get_rostime()
                rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
                rospy_time_elapsed = rospy_stop_time - rospy_start_time
                print "Replay Buffer Size: " + str(replay_buffer.return_size(
                )) + " out of " + str(replay_memory_size)
                print "Replay buffer memory uses (KB): " + str(
                    replay_buffer.return_size_byte(value="kilobyte"))
                print "Replay buffer memory uses (MB): " + str(
                    replay_buffer.return_size_byte(value="megabyte"))
                print "Replay buffer memory uses (GB): " + str(
                    replay_buffer.return_size_byte(value="gigabyte"))
                print "Frame counter: " + str(frame_preliminary)
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print("Ros time episode: " + str(rospy_time_elapsed) +
                      " seconds")
                print "Replay Buffer experiences: " + str(
                    replay_buffer.return_size())
                if cumulated_reward >= 0:
                    rospy.logwarn("Positive reward obtained!")
                print "Cumulated reward: " + str(cumulated_reward)
                print "Episode finished after {} timesteps".format(step + 1)
                break

    timer_total_stop = time.time()
    print "Total time simulation: " + str(
        (timer_total_stop - timer_total_start) / 60.0) + " minutes"
    print "Total time simulation: " + str(
        (timer_total_stop - timer_total_start) / 3600.0) + " hours"
    # Once the buffer is filled, save it to disk
    timer_saving_start = time.time()
    print "Saving the replay buffer in: " + replay_buffer_path
    print "Sit back and relax, it may take a while..."
    replay_buffer.save(replay_buffer_path)
    print "Done!"
    timer_saving_stop = time.time()
    print "Time to save the buffer: " + str(timer_saving_stop -
                                            timer_saving_start) + " seconds"
    print "Time to save the buffer: " + str(
        (timer_saving_stop - timer_saving_start) / 60) + " minutes"
    # Shutdown the node
    rospy.signal_shutdown("Rospy Shutdown!")
Esempio n. 8
0
def main():

    replay_memory_size = 400000
    replay_buffer_path_1 = "/home/pulver/Desktop/replay_buffer_positive_soil.pickle"
    replay_buffer_path_2 = "/home/pulver/Desktop/new_replay_buffer.pickle"
    split_experiences = 3000
    #replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer_1 = ExperienceReplayBuffer(capacity=replay_memory_size)
    replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size)

    timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer_1.load(replay_buffer_path_1)
    timer_stop = time.time()

    print "Time episode: " + str(timer_stop - timer_start) + " seconds"
    print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
    print "Size_1 : " + str(replay_buffer_1.return_size())
    print("")

    print "Starting splitting..."
    replay_buffer_1.split(replay_buffer_path_1, replay_buffer_2,
                          replay_buffer_path_2, split_experiences)
    print "Splitting completed!"
    print "Starting saving..."
    replay_buffer_1.load(replay_buffer_path_1)
    print "Size_1 : " + str(replay_buffer_1.return_size())
    print "Size_2 : " + str(replay_buffer_2.return_size())
Esempio n. 9
0
def main():
    """
    Initialize and run the rospy node
    """
    timer_total_start = time.time()
    rospy.init_node("ReplayBufferFiller")
    rospy.loginfo("----- Replay Buffer Filler -----")

    replay_memory_size = 20000
    replay_buffer_path = "./replay_buffer_shared.pickle"
    replay_buffer_path_neutral = "./replay_buffer_neutral.pickle"
    replay_buffer_path_positive = "./replay_buffer_positive.pickle"
    replay_buffer_path_negative = "./replay_buffer_negative.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    replay_buffer_neutral = ExperienceReplayBuffer(capacity=replay_memory_size)
    replay_buffer_positive = ExperienceReplayBuffer(
        capacity=replay_memory_size)
    replay_buffer_negative = ExperienceReplayBuffer(
        capacity=replay_memory_size)
    # Load the Replay buffer from file or accumulate experiences
    if (os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
    else:
        print('No buffer_1 found')

    if (os.path.isfile(replay_buffer_path_neutral) == True):
        print("Replay buffer loading from file: " +
              str(replay_buffer_path_neutral))
        replay_buffer_neutral.load(replay_buffer_path_neutral)
    else:
        print('No buffer_2 found')

    if (os.path.isfile(replay_buffer_path_positive) == True):
        print("Replay buffer loading from file: " +
              str(replay_buffer_path_positive))
        replay_buffer_positive.load(replay_buffer_path_positive)
    else:
        print('No buffer_2 found')

    if (os.path.isfile(replay_buffer_path_negative) == True):
        print("Replay buffer loading from file: " +
              str(replay_buffer_path_negative))
        replay_buffer_negative.load(replay_buffer_path_negative)
    else:
        print('No buffer_2 found')

    # Create a subscriber fot the greyscale image
    rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw",
                     ROSImage, image_callback)

    images_stack_size = 4
    tot_steps = 3000000  # finite-horizont simulation
    frame_preliminary = 0

    saving_every_tot_experiences = 250
    is_buffer_saved = True
    noop_time = 2.0  # pause in seconds between actions
    steps_per_episodes = 30
    total_experience_counter = 0.0
    episode = 0
    start_episode = 1
    tot_episodes = 20000
    wrong_altitude = False
    quadrotor_pose = ModelState()
    quadrotor_pose.model_name = "quadrotor"
    quadrotor_pose.reference_frame = "world"
    for episode in range(start_episode, tot_episodes):
        cumulated_reward = 0
        print ""
        print "Preliminary Episode: " + str(episode)
        # print "Ground counter : " + str(ground_counter)
        # Reset UAV at random pose
        reset_pose()
        send_action('stop')
        rospy.sleep(3.0)
        # get_image()
        image_t = _last_image
        # When the replay buffer is empty, fill it with the same picture 4
        # times
        image_t = np.stack([image_t] * images_stack_size,
                           axis=2)  # create a stack of X images
        timer_start = time.time()
        actual_time = rospy.get_rostime()
        rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
        frame_episode = 0

        done_reward = get_done_reward()
        update_quadrotor_pose(quadrotor_pose, done_reward)

        for step in range(tot_steps):
            # Execute a random action in the world and observe the reward and
            # state_t1.
            action = get_random_action()
            send_action(action)
            if action == "descend":
                rospy.sleep(5.0)
                send_action("stop")
                rospy.sleep(1.0)
            else:
                rospy.sleep(noop_time)
            # Acquire a new frame and convert it in a numpy array
            image_t1 = _last_image
            done_reward = get_done_reward()
            # NOTE: moved here to fix problem with baricenter (partially
            # reduced)
            send_action("stop")

            # Get the reward and done status

            reward = done_reward.reward
            done = done_reward.done
            print "Step(" + str(
                step) + "), Action: " + action + ", Altitude: " + str(
                    done_reward.z) + ", Reward: " + str(reward)
            wrong_altitude = done_reward.wrong_altitude
            if wrong_altitude == True:
                rospy.logerr("[ERROR] Wrong altitude!")
            # Calculate the new cumulated_reward
            cumulated_reward += reward
            # state_t1, reward, done, info = env.step(action)
            image_t1 = np.expand_dims(image_t1, 2)
            # stack the images
            image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
            # Store the experience in the replay buffer
            if reward > 0:
                if action == "descend":
                    replay_buffer_positive.add_experience(
                        image_t, action, reward, image_t1, done)
                    is_buffer_saved = False
                    # pass
                else:
                    rospy.logerr(
                        "[POSITIVE]Wrong action for positive reward: %s",
                        action)
            elif reward == -1.0:
                if action == "descend":
                    replay_buffer_negative.add_experience(
                        image_t, action, reward, image_t1, done)
                    # pass
                else:
                    rospy.logerr(
                        "[NEGATIVE]Wrong action for negative reward: %s",
                        action)
            else:
                # pass
                replay_buffer_neutral.add_experience(image_t, action, reward,
                                                     image_t1, done)
            # Add all the experiences without distinction into the shared
            # buffer
            replay_buffer.add_experience(image_t, action, reward, image_t1,
                                         done)

            frame_preliminary += 1  # To call every time a frame is obtained
            total_experience_counter += 1
            image_t = image_t1
            timer_episode_stop = time.time()
            frame_episode += 1
            update_quadrotor_pose(quadrotor_pose, done_reward)

            # rospy.sleep(2.0) #NOTE: fix the descend bug affecting the
            # altitude
            if frame_episode >= steps_per_episodes:
                done = True
            # Save the buffer every 25000 experiences
            # if replay_buffer_positive.return_size() %
            # saving_every_tot_experiences == 0 and is_buffer_saved == False:
            if total_experience_counter % saving_every_tot_experiences == 0:
                timer_start = time.time()
                print("")
                print("Saving the replay buffer in: " + replay_buffer_path)
                print("Sit back and relax, it may take a while...")
                replay_buffer.save(replay_buffer_path)
                timer_stop = time.time()
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print "Time episode: " + str(
                    (timer_stop - timer_start) / 60) + " minutes"
                print("Done!")
                timer_start = time.time()
                print("")
                print("Saving the replay buffer in: " +
                      replay_buffer_path_neutral)
                print("Sit back and relax, it may take a while...")
                replay_buffer_neutral.save(replay_buffer_path_neutral)
                timer_stop = time.time()
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print "Time episode: " + str(
                    (timer_stop - timer_start) / 60) + " minutes"
                print("Done!")
                print("")
                print("Saving the replay buffer in: " +
                      replay_buffer_path_positive)
                print("Sit back and relax, it may take a while...")
                replay_buffer_positive.save(replay_buffer_path_positive)
                timer_stop = time.time()
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print "Time episode: " + str(
                    (timer_stop - timer_start) / 60) + " minutes"
                print("Done!")
                print("")
                print("Saving the replay buffer in: " +
                      replay_buffer_path_negative)
                print("Sit back and relax, it may take a while...")
                replay_buffer_negative.save(replay_buffer_path_negative)
                timer_stop = time.time()
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print "Time episode: " + str(
                    (timer_stop - timer_start) / 60) + " minutes"
                print("Done!")
                print("")
                is_buffer_saved = True
            if done:
                episode += 1
                timer_stop = time.time()
                actual_time = rospy.get_rostime()
                rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
                rospy_time_elapsed = rospy_stop_time - rospy_start_time
                print "Replay Buffer Size: " + str(replay_buffer.return_size(
                )) + " out of " + str(replay_memory_size)
                print "Replay Buffer Neutral Size: " + str(
                    replay_buffer_neutral.return_size()) + " out of " + str(
                        replay_memory_size)
                print "Replay Buffer Positive Size: " + str(
                    replay_buffer_positive.return_size()) + " out of " + str(
                        replay_memory_size)
                print "Replay Buffer Negative Size: " + str(
                    replay_buffer_negative.return_size()) + " out of " + str(
                        replay_memory_size)
                print "Frame counter: " + str(frame_preliminary)
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print("Ros time episode: " + str(rospy_time_elapsed) +
                      " seconds")
                if cumulated_reward >= 0:
                    rospy.logwarn("Positive reward obtained!")
                print "Cumulated reward: " + str(cumulated_reward)
                print "Episode finished after {} timesteps".format(step + 1)
                break
Esempio n. 10
0
def main():

    replay_memory_size = 400000
    replay_buffer_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_shared.pickle"
    replay_buffer_positive_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_positive.pickle"
    replay_buffer_negative_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_negative.pickle"
    replay_buffer_neutral_path = "/home/pulver/Desktop/buffers_comparison/replay_buffer_neutral.pickle"
    #replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    replay_buffer_positive = ExperienceReplayBuffer(
        capacity=replay_memory_size)
    replay_buffer_negative = ExperienceReplayBuffer(
        capacity=replay_memory_size)
    replay_buffer_neutral = ExperienceReplayBuffer(capacity=replay_memory_size)

    timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer.load(replay_buffer_path)
    replay_buffer_positive.load(replay_buffer_positive_path)
    replay_buffer_negative.load(replay_buffer_negative_path)
    replay_buffer_neutral.load(replay_buffer_neutral_path)
    timer_stop = time.time()

    print "Time episode: " + str(timer_stop - timer_start) + " seconds"
    print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
    print "Size shared buffer: " + str(replay_buffer.return_size())
    print "Size positive buffer: " + str(replay_buffer_positive.return_size())
    print "Size negative buffer: " + str(replay_buffer_negative.return_size())
    print "Size neutral buffer: " + str(replay_buffer_neutral.return_size())
    print("")

    #----------------- PRINT EXPERIENCE COUNTER -----------------
    print "Shared buffer"
    replay_buffer.count_experience()
    print ""
    print "Positive buffer"
    replay_buffer_positive.count_experience()
    print ""
    print "Negative buffer"
    replay_buffer_negative.count_experience()
    print ""
    print "Neutral buffer"
    replay_buffer_neutral.count_experience()
    print ""
Esempio n. 11
0
def main():
    replay_memory_size = 100000
    replay_buffer_path = "/home/pulver/Desktop/simulation_11/replay_buffer_negative_sand_laptop.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    if(os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
        print "Size: " + str(replay_buffer.return_size())
    else:
        print "Buffer replay does not exist!"

    #Load the second replay buffer
    replay_buffer_positive_path = "/home/pulver/Desktop/simulation_11/replay_buffer_negative_sand.pickle"
    replay_buffer_positive = ExperienceReplayBuffer(capacity=replay_memory_size)
    if(os.path.isfile(replay_buffer_positive_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_positive_path))
        replay_buffer_positive.load(replay_buffer_positive_path)
        # Merge the two replay buffers in the first
        replay_buffer.append_from_file(replay_buffer_positive_path) #TODO: uncomment
        # Load the second buffer into the first one
        # replay_buffer.append(replay_buffer_positive, starting_point=0) # TODO: comment to enable the load from path
        print "Merge done! New size: " + str(replay_buffer.return_size())
        print "Saving the replay buffer in: " + replay_buffer_path
        print "Sit back and relax, it may take a while..."
        replay_buffer.save(replay_buffer_path)
        print "Done!"
    else:
        print "Second replay buffer does not exist!"

    print("Start merging...")
    # Merge the two replay buffers in the first
    replay_buffer.append_from_file(replay_buffer_positive_path) #TODO: uncomment
    # Load the second buffer into the first one
    #replay_buffer.append(replay_buffer_positive, starting_point=0) # TODO: comment to enable the load from path
    print "Merge done! New size: " + str(replay_buffer.return_size())
    print "Saving the replay buffer in: " + replay_buffer_path
    print "Sit back and relax, it may take a while..."
        eplay_buffer.save(replay_buffer_path)
def main():
    """
    Initialize and run the rospy node
    """
    timer_total_start = time.time()
    rospy.init_node("ReplayBufferFiller")
    rospy.loginfo("----- Replay Buffer Filler -----")

    ground_list = [
        "water1", "water2", "water3", "water4", "water5", "water6", "water7",
        "water8", "water9", "water10"
    ]

    replay_memory_size = 400000
    replay_buffer_path = "./replay_buffer.pickle"
    # replay_buffer_path_positive = "./replay_buffer_positive.pickle"
    # replay_buffer_path_negative = "./replay_buffer_negative.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    # replay_buffer_positive = ExperienceReplayBuffer(capacity=replay_memory_size)
    # replay_buffer_negative = ExperienceReplayBuffer(capacity=replay_memory_size)
    # Load the Replay buffer from file or accumulate experiences
    if (os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
    else:
        print('No buffer_1 found')

    # if(os.path.isfile(replay_buffer_path_positive) == True):
    #     print("Replay buffer loading from file: " +
    #           str(replay_buffer_path_positive))
    #     replay_buffer_positive.load(replay_buffer_path_positive)
    # else:

#     print('No buffer_2 found')

# if(os.path.isfile(replay_buffer_path_negative) == True):
#     print("Replay buffer loading from file: " +
#           str(replay_buffer_path_negative))
#     replay_buffer_negative.load(replay_buffer_path_negative)
# else:
#     print('No buffer_2 found')

# Create a subscriber fot the greyscale image
    rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw",
                     ROSImage, image_callback)

    images_stack_size = 4
    tot_steps = 3000000  # finite-horizont simulation
    frame_preliminary = 0

    saving_every_tot_experiences = 2500
    is_buffer_saved = True

    noop_time = 2.0  # pause in seconds between actions
    steps_per_episodes = 30
    #saving_every_tot_experiences = 450  #TODO SET TO 250 JUST FOR TEST
    #r = rospy.Rate(10)  # 10hz
    num_ground_plane = 15
    frame_per_ground_plane = int(replay_memory_size / num_ground_plane)
    frame_per_ground_plane = 3125  #!M positive / 4 classes / 10 grounds / 8 transformations
    actual_ground_index = 0
    episode_per_ground = 50
    #ground_counter = replay_buffer_positive.return_size() / frame_per_ground_plane
    ground_counter = 1
    positive_experience_counter = 0
    positive_experience_print_episode = 50
    old_positive_experience_counter = 0
    total_experience_counter = 0.0
    old_total_experience_counter = 0.0001
    episode = 1
    wrong_altitude = False
    quadrotor_pose = ModelState()
    quadrotor_pose.model_name = "quadrotor"
    quadrotor_pose.reference_frame = "world"
    while True:
        # if replay_buffer_positive.return_size() >= replay_memory_size:
        #     break

        # if replay_buffer_positive.return_size() <= ground_counter * frame_per_ground_plane and episode != 1:
        #     pass
        # else:
        #     print ground_counter
        #     generate_new_world(ground_list, ground_counter)
        #     ground_counter = ground_counter + 1
        if (ground_counter < episode_per_ground) and episode != 1:
            ground_counter = ground_counter + 1
        else:
            ground = choose_random_ground(ground_list)
            generate_new_world(ground, ground_list)
            ground_counter = 1

        cumulated_reward = 0
        print ""
        print "Preliminary Episode: " + str(episode)
        print "Ground counter value: " + str(ground_counter)
        # Reset UAV at random pose
        reset_pose()
        send_action('stop')
        rospy.sleep(3.0)
        #get_image()
        image_t = _last_image
        # When the replay buffer is empty, fill it with the same picture 4
        # times
        image_t = np.stack([image_t] * images_stack_size,
                           axis=2)  # create a stack of X images
        timer_start = time.time()
        actual_time = rospy.get_rostime()
        rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
        frame_episode = 0

        done_reward = get_done_reward()
        update_quadrotor_pose(quadrotor_pose, done_reward)

        for step in range(tot_steps):
            # Execute a random action in the world and observe the reward and
            # state_t1.
            action = get_random_action()
            send_action(action)
            if action == "descend":
                # setpoint = round( quadrotor_pose.pose.position.z ) - 0.8
                # while True:
                #     done_reward = get_done_reward()
                #     update_quadrotor_pose(quadrotor_pose, done_reward)
                #     if quadrotor_pose.pose.position.z < setpoint + 0.05 and quadrotor_pose.pose.position.z > setpoint - 0.05:
                #         print "Setpoint: " + str(setpoint)
                #         send_action("stop")
                #         rospy.sleep(2.0)
                #         break
                rospy.sleep(5.0)
                send_action("stop")
                rospy.sleep(1.0)
                #quadrotor_pose.pose.position.z = adjust_altitude(quadrotor_pose.pose.position.z)
                #set_pose(quadrotor_pose)
            else:
                #print "Action taken: " + action
                #send_action(action)
                rospy.sleep(noop_time)
            # Acquire a new frame and convert it in a numpy array
            image_t1 = _last_image
            done_reward = get_done_reward()
            send_action(
                "stop"
            )  #NOTE: moved here to fix problem with baricenter (partially reduced)

            # Get the reward and done status

            reward = done_reward.reward
            done = done_reward.done
            print "Step(" + str(
                step) + "), Action: " + action + ", Altitude: " + str(
                    done_reward.z) + ", Reward: " + str(reward)
            wrong_altitude = done_reward.wrong_altitude
            if wrong_altitude == True:
                rospy.logerr("[ERROR] Wrong altitude!")
            # Calculate the new cumulated_reward
            cumulated_reward += reward
            # state_t1, reward, done, info = env.step(action)
            image_t1 = np.expand_dims(image_t1, 2)
            # stack the images
            image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
            # Store the experience in the replay buffer
            if reward > 0:
                if action == "descend":
                    # replay_buffer_positive.add_experience(image_t, action, reward, image_t1, done)
                    # is_buffer_saved = False
                    pass
                else:
                    rospy.logerr(
                        "[POSITIVE]Wrong action for positive reward: %s",
                        action)
            elif reward == -1.0:
                if action == "descend":
                    # replay_buffer_negative.add_experience(image_t, action, reward, image_t1, done)
                    pass
                else:
                    rospy.logerr(
                        "[NEGATIVE]Wrong action for negative reward: %s",
                        action)
            else:
                # pass
                replay_buffer.add_experience(image_t, action, reward, image_t1,
                                             done)
            frame_preliminary += 1  # To call every time a frame is obtained
            total_experience_counter += 1
            image_t = image_t1
            timer_episode_stop = time.time()
            frame_episode += 1
            update_quadrotor_pose(quadrotor_pose, done_reward)

            #rospy.sleep(2.0) #NOTE: fix the descend bug affecting the altitude
            if frame_episode >= steps_per_episodes:
                done = True
            # Save the buffer every 25000 experiences
            # if replay_buffer_positive.return_size() % saving_every_tot_experiences == 0 and is_buffer_saved == False:
            if replay_buffer.return_size() % saving_every_tot_experiences == 0:
                timer_start = time.time()
                print("")
                print("Saving the replay buffer in: " + replay_buffer_path)
                print("Sit back and relax, it may take a while...")
                replay_buffer.save(replay_buffer_path)
                timer_stop = time.time()
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print "Time episode: " + str(
                    (timer_stop - timer_start) / 60) + " minutes"
                print("Done!")
                # timer_start = time.time()
                # print("")
                # print("Saving the replay buffer in: " + replay_buffer_path_positive)
                # print("Sit back and relax, it may take a while...")
                # replay_buffer_positive.save(replay_buffer_path_positive)
                # timer_stop = time.time()
                # print "Time episode: " + str(timer_stop - timer_start) + " seconds"
                # print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
                # print("Done!")
                # print("")
                # print("Saving the replay buffer in: " + replay_buffer_path_negative)
                # print("Sit back and relax, it may take a while...")
                # replay_buffer_negative.save(replay_buffer_path_negative)
                # timer_stop = time.time()
                # print "Time episode: " + str(timer_stop - timer_start) + " seconds"
                # print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
                # print("Done!")
                # print("")
                # is_buffer_saved = True
            if done:
                episode += 1
                timer_stop = time.time()
                actual_time = rospy.get_rostime()
                rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
                rospy_time_elapsed = rospy_stop_time - rospy_start_time
                print "Replay Buffer Size: " + str(replay_buffer.return_size(
                )) + " out of " + str(replay_memory_size)
                # print "Replay Buffer Positive Size: " + str(replay_buffer_positive.return_size()) + " out of " + str(replay_memory_size)
                # print "Replay Buffer Negative Size: " + str(replay_buffer_negative.return_size()) + " out of " + str(replay_memory_size)
                print "Frame counter: " + str(frame_preliminary)
                print "Time episode: " + str(timer_stop -
                                             timer_start) + " seconds"
                print("Ros time episode: " + str(rospy_time_elapsed) +
                      " seconds")
                if cumulated_reward >= 0:
                    rospy.logwarn("Positive reward obtained!")
                print "Cumulated reward: " + str(cumulated_reward)
                print "Episode finished after {} timesteps".format(step + 1)
                break

    # timer_total_stop = time.time()
    # print "Total time simulation: " + str((timer_total_stop - timer_total_start) / 60.0) + " minutes"
    # print "Total time simulation: " + str((timer_total_stop - timer_total_start) / 3600.0) + " hours"
    # # Once the buffer is filled, save it to disk
    # timer_saving_start = time.time()
    # print "Saving the replay buffer in: " + replay_buffer_positive_path
    # print "Sit back and relax, it may take a while..."
    # replay_buffer_positive.save(replay_buffer_positive_path)
    # print "Done!"
    # timer_saving_stop = time.time()
    # print "Time to save the buffer: " + str(timer_saving_stop - timer_saving_start) + " seconds"
    # print "Time to save the buffer: " + str((timer_saving_stop - timer_saving_start) / 60) + " minutes"
    # timer_saving_start = time.time()
    # print "Saving the replay buffer in: " + replay_buffer_negative_path
    # print "Sit back and relax, it may take a while..."
    # replay_buffer_negative.save(replay_buffer_negative_path)
    # print "Done!"
    # timer_saving_stop = time.time()
    # print "Time to save the buffer: " + str(timer_saving_stop - timer_saving_start) + " seconds"
    # print "Time to save the buffer: " + str((timer_saving_stop - timer_saving_start) / 60) + " minutes"
    # Shutdown the node
    rospy.signal_shutdown("Rospy Shutdown!")
Esempio n. 13
0
def main():

    replay_memory_size = 400000
    replay_buffer_path = "/media/pulver/Pulver HD/replay_buffer.pickle"
    #replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)

    timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer.load(replay_buffer_path)
    timer_stop = time.time()

    print "Time episode: " + str(timer_stop - timer_start) + " seconds"
    print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
    print "Size : " + str(replay_buffer.return_size())
    print("")

    #----------------- PRINT EXPERIENCE FOR TESTING-----------------

    index = 97000
    increment = 1
    image, image_t1 = replay_buffer.debug_experience(index=index)
    image_t2, image_t3 = replay_buffer.debug_experience(index=index +
                                                        increment)
    image_t4, image_t5 = replay_buffer.debug_experience(index=index +
                                                        increment * 2)
    image_t6, image_t7 = replay_buffer.debug_experience(index=index +
                                                        increment * 3)
    image_t8, image_t9 = replay_buffer.debug_experience(index=index +
                                                        increment * 4)
    image_vstack = np.vstack(
        (image, image_t1, image_t2, image_t3, image_t4, image_t5, image_t6,
         image_t7, image_t8, image_t9))
    cv2.imshow("experience 0-9 vertical stack", image_vstack)

    index = 98000
    increment = 1
    image, image_t1 = replay_buffer.debug_experience(index=index)
    image_t2, image_t3 = replay_buffer.debug_experience(index=index +
                                                        increment)
    image_t4, image_t5 = replay_buffer.debug_experience(index=index +
                                                        increment * 2)
    image_t6, image_t7 = replay_buffer.debug_experience(index=index +
                                                        increment * 3)
    image_t8, image_t9 = replay_buffer.debug_experience(index=index +
                                                        increment * 4)
    image_vstack = np.vstack(
        (image, image_t1, image_t2, image_t3, image_t4, image_t5, image_t6,
         image_t7, image_t8, image_t9))
    cv2.imshow("experience 0-9 vertical stack - buffer 2", image_vstack)
    while True:
        if cv2.waitKey(33) == ord('q'):
            cv2.destroyAllWindows()
            break
Esempio n. 14
0
def main():
    replay_memory_size = 100000
    replay_buffer_path = "/home/pulver/Desktop/simulation_11/sand/replay_buffer_positive_sand.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    if (os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
        print "Size: " + str(replay_buffer.return_size())
    else:
        print "Buffer replay does not exist!"
        return

    # experience = replay_buffer.buffer[0]
    # image_t = experience[0]
    # action_t = experience[1]
    # reward_t = experience[2]
    # image_t1 = experience[3]
    # done_t1 = experience[4]

    # Modify the whole buffer or just add a single experience
    replay_buffer.rotate_and_push(rotations_list=[
        '90', '180', '270', 'vflip', 'vflip90', 'vflip180', 'vflip270'
    ],
                                  tot_elements=None)
    replay_buffer.shuffle()
    #replay_buffer.add_experience_and_rotate(image_t, action_t, reward_t, image_t1, done_t1, rotation_list=['90', '180', '270', 'vflip', 'vflip90', 'vflip180', 'vflip270'])
    print("Rotation done! New size: " + str(replay_buffer.return_size()))
    print("Saving the buffer...")
    replay_buffer.save(
        file_name=
        "/home/pulver/Desktop/simulation_11/sand/replay_buffer_positive_sand_rotated_"
        + str(replay_buffer.return_size()) + ".pickle")
    print("Done!")
Esempio n. 15
0
def main():

    #Load the main buffer
    replay_memory_size = 500000
    replay_buffer_path = "/home/pulver/Desktop/marine_descending/replay_buffer_192k.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    if (os.path.isfile(replay_buffer_path) == True):
        print("Replay buffer loading from file: " + str(replay_buffer_path))
        replay_buffer.load(replay_buffer_path)
        print "Size: " + str(replay_buffer.return_size())
    else:
        print "Buffer replay does not exist!"

    #Here we load all the mini-buffer needed later
    print("Loading the buffer list...")
    max_capacity = 200000
    path = "/home/pulver/Desktop/marine_descending/replay_buffer_194k.pickle"
    buffer_1 = ExperienceReplayBuffer(capacity=max_capacity)
    buffer_1.load(path)
    print "Size: " + str(buffer_1.return_size())

    # max_capacity = 100000
    # path = "/home/pulver/Desktop/marine_descending/replay_buffer_negative_8910.pickle"
    # buffer_2 = ExperienceReplayBuffer(capacity=max_capacity)
    # buffer_2.load(path)
    # print "Size: " + str(buffer_2.return_size())

    # max_capacity = 100000
    # path = "/home/pulver/Desktop/simulation_11/soil/replay_buffer_negative_soil.pickle"
    # buffer_3 = ExperienceReplayBuffer(capacity=max_capacity)
    # buffer_3.load(path)
    # print "Size: " + str(buffer_3.return_size())

    # max_capacity = 100000
    # path = "/home/pulver/Desktop/simulation_11/snow/replay_buffer_negative_snow.pickle"
    # buffer_4 = ExperienceReplayBuffer(capacity=max_capacity)
    # buffer_4.load(path)
    # print "Size: " + str(buffer_4.return_size())

    #max_capacity = 100000
    #path = "/home/pulver/Desktop/simulation_11/snow/replay_buffer_positive_snow_rotated_78200.pickle"
    #buffer_5 = ExperienceReplayBuffer(capacity=max_capacity)
    #buffer_5.load(path)

    #Here we append the file to the main buffer
    print("Starting multiappend...")
    print "Sit back and relax, it may take a while..."
    multibuffer_list = [buffer_1]  #, buffer_2]#, buffer_3, buffer_4]
    replay_buffer.multiappend(multibuffer_list)
    print "New size: " + str(replay_buffer.return_size())
    print("Done!")

    #In the end we shuffle and save
    print("Starting shuffling experiences...")
    replay_buffer.shuffle()
    print("Done!")

    print("Starting save buffer...")
    replay_buffer.save(
        "/home/pulver/Desktop/marine_descending/replay_buffer_neutral_final.pickle"
    )
    print("Done!")
Esempio n. 16
0
def main():
    
    replay_memory_size = 400000
    replay_buffer_path = "/home/pulver/Desktop/test/replay_buffer_positive.pickle"
    replay_buffer_path_2 = "/home/pulver/Desktop/replay_buffer_positive_rick.pickle"
    #replay_buffer_path = "./replay_buffer.pickle"
    replay_buffer = ExperienceReplayBuffer(capacity=replay_memory_size)
    replay_buffer_2 = ExperienceReplayBuffer(capacity=replay_memory_size)
    # Save the empty buffer
    if(os.path.isfile(replay_buffer_path_2) == True):  
        print "The second buffer exist"
    else:
        print "Buffer not existing. I am saving an empty one."
        replay_buffer_2.save(replay_buffer_path_2)
    
    timer_start = time.time()
    # Load the Replay buffer from file or accumulate experiences
    replay_buffer.load(replay_buffer_path)
    timer_stop = time.time()
    
    replay_buffer.copy_experience_to(replay_buffer_2, replay_buffer_path_2, 2000)
    replay_buffer_2.load(replay_buffer_path_2)

    print "Time episode: " + str(timer_stop - timer_start) + " seconds"                
    print "Time episode: " + str((timer_stop - timer_start) / 60) + " minutes"
    print "Size : " + str(replay_buffer.return_size())
    print "Size 2: " + str(replay_buffer_2.return_size())
    print("")