def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """
    # ATTENTION: If you want to restore files from a previous simulation you
    # must pass valid values for these variables:
    policy_weights_path_1 = '/home/pulver/Desktop/episode_113250/policy/policy_checkpoint.ckp'
    root_images = "/home/pulver/Desktop/network_testing/" + \
        str(datetime.datetime.now().time()) + "/"
    # NOTE: openCV doesn't write in a folder that does not exist
    if not os.path.exists(root_images):
        os.makedirs(root_images)
    source = 3  # NOTE: 1 for real drone, 2 for gazebo, 3 for URL

    screen_width = 84  # original is 160
    screen_height = 84  # original is 210
    images_stack_size = 4
    # Use only the first 5 actions for this simulation
    # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend']
    # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend)
    tot_actions = 6
    batch_size = 32  # size of the experience batch

    rospy.init_node("DRLTrainingNode")
    rospy.loginfo("----- DRL Training Node -----")

    # Create a subscriber fot the greyscale image

    # 1) Real drone
    if source == 1:
        rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback)
    elif source == 2:
        # 2) Gazebo
        rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw",
                         ROSImage, image_callback, queue_size=30)  # Store the last 30 messages
    elif source == 3:
        # 3) URL
        # video_stream_url = 'http://10.188.34.59:8080/videofeed'
        video_stream_url = "http://10.5.5.9:8080/live/amba.m3u8"
        bytes = ''
    else:
        print "Insert a correct source value (1 for real drone, 2 for gazebo, 3 for URL)"

    images_stack_size = 4
    tot_steps = 3000000  # finite-horizont simulation

    r = rospy.Rate(1)  # 10hz

    # Init session and networks
    sess = tf.Session()
    summary_folder = ""  # if empty the summary is written in ./log/ + current time
    if(summary_folder == ""):
        tf_summary_writer = tf.summary.FileWriter(
            './log/' + str(datetime.datetime.now().time()), sess.graph)
    else:
        tf_summary_writer = tf.summary.FileWriter(
            summary_folder, sess.graph)
    policy_network_1 = QNetwork(sess, tot_actions=tot_actions, image_shape=(
        screen_width, screen_height, images_stack_size), batch_size=batch_size,
        network_name="policy_net")

    # Instructions for updating: Use `tf.global_variables_initializer
    init = tf.global_variables_initializer()
    sess.run(init)

    # Load Neural Networks weights from memory if a valid checkpoint path is
    # passed
    if(policy_weights_path_1 != ""):
        print("Loading weights 1 from memory...")
        policy_network_1.load_weights(policy_weights_path_1)
    else:
        print("The networks path are empty.")
    #     sys.exit()

    if source == 3:
        state = acquire_frame(video_stream_url)
        time.sleep(1)
    state = _last_image
    # Save first image
    image_path = root_images + "image_0.png"
    cv2.imwrite(image_path, state)
    # 1 - Create a stack of X images
    image_t = np.stack([state] * images_stack_size, axis=2)
    matplotlib.interactive(True)
    # fig = plt.figure()
    f, (ax3, ax2) = plt.subplots(2, 1)
    f.tight_layout()
    # gs = gridspec.GridSpec(2, 2, width_ratios=[1, 1])
    # gs.update(left=0.05, right=0.95, wspace=0.05, hspace=0)
    # fig.set_size_inches(8, 4)
    f.patch.set_facecolor('gray')

    for step in range(1, 100000000):
        ##########################################
        ##                CAMERA                ##
        ##########################################
        pad_size = 1
        pad_value = 0
        # print "Shape image_t:" + str(np.shape(image_t))
        image = np.lib.pad(image_t[:, :, 0], (pad_size, pad_size),
                           'constant', constant_values=(pad_value, pad_value))
        for d in range(1, images_stack_size):
            image_stack_padded = np.lib.pad(
                image_t[:, :, d], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value))
            image = np.append(image, image_stack_padded, axis=1)
        # print"shape: " + str(np.shape(image))
        # Plot in the first row the camera images
        # ax1 = plt.subplot(2, 1, 1)
        # ax1 = plt.subplot(gs[0, :])
        # but first clear the old one
        # ax1.clear()
        # ax1.axis("off")
        # specify greyscale instead BGR
        # ax1.imshow(image, cmap='gray')
        #########################################

        # 2 - Forward in input to NN
        # action_distribution_1 = policy_network_1.return_action_distribution(
        # input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)),
        # softmax=False)
        action_distribution_1, conv1, conv2, conv3 = policy_network_1.return_everything(
            input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False)
        action = np.argmax(action_distribution_1)
        action = convert_action_int_to_str(action)
        print "######################"
        print "Action distribution: " + str(action_distribution_1)
        print "Action_1 selected: " + action
        # print "Shape action dis:" + str(np.shape(action_distribution_1))
        # print "Shape conv1:" + str(np.shape(conv1))
        # print "Shape conv2:" + str(np.shape(conv2))
        # print "Shape conv3:" + str(np.shape(conv3))

        ##########################################
        ##                FILTERS               ##
        ##########################################
        ax3.clear()
        ax3.axis("off")

        padding = np.ones((1, 21 * 16))
        # 1 Conv layer ###########################
        conv1 = np.reshape(conv1, (21, 21, 32))
        image_cv1_1 = np.reshape(conv1[:, :, 0:16], (21, 21 * 16), order='F')
        image_cv1_2 = np.reshape(conv1[:, :, 16:32], (21, 21 * 16), order='F')
        # image_cv1_1 = np.reshape(conv1[:, :, 0:8], (21, 21 * 8), order='F')
        # image_cv1_2 = np.reshape(conv1[:, :, 8:16], (21, 21 * 8), order='F')
        # image_cv1_3 = np.reshape(conv1[:, :, 16:24], (21, 21 * 8), order='F')
        # image_cv1_4 = np.reshape(conv1[:, :, 24:32], (21, 21 * 8), order='F')
        image_cv1 = np.concatenate(
            (padding, image_cv1_1, image_cv1_2), axis=0)
        # Save filters
        filter_path = root_images + "filters/step_" + \
            str(step) + "/"
        if not os.path.exists(filter_path):
            os.makedirs(filter_path)
        filter_path = filter_path + "conv_1.jpg"

        I = image_cv1
        I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8)
        image_cv1_resized = cv2.resize(I8, (84 * 4, 21 * 2),
                                       interpolation=cv2.INTER_AREA)
        img = Image.fromarray(image_cv1_resized)
        img.save(filter_path)

        # 2 Conv layer ###########################
        padding = np.zeros((1, 11 * 32))
        conv2 = np.reshape(conv2, (11, 11, 64))
        image_cv2_1 = np.reshape(conv2[:, :, 0:32], (11, 11 * 32), order='F')
        image_cv2_2 = np.reshape(conv2[:, :, 32:64], (11, 11 * 32), order='F')
        # image_cv2_3 = np.reshape(conv2[:, :, 16:24], (21, 21 * 8), order='F')
        # image_cv2_4 = np.reshape(conv2[:, :, 24:32], (21, 21 * 8), order='F')
        image_cv2 = np.concatenate(
            (padding, image_cv2_1, image_cv2_2), axis=0)
        # Save filters
        filter_path = root_images + "filters/step_" + \
            str(step) + "/"
        if not os.path.exists(filter_path):
            os.makedirs(filter_path)
        filter_path = filter_path + "conv_2.jpg"

        I = image_cv2
        I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8)
        image_cv2_resized = cv2.resize(I8, (84 * 4, 11 * 2),
                                       interpolation=cv2.INTER_AREA)
        img = Image.fromarray(image_cv2_resized)
        img.save(filter_path)

        # 3 Conv layer ###########################
        padding = np.ones((1, 11 * 32))
        conv3 = np.reshape(conv3, (11, 11, 64))
        image_cv3_1 = np.reshape(conv3[:, :, 0:32], (11, 11 * 32), order='F')
        image_cv3_2 = np.reshape(conv3[:, :, 32:64], (11, 11 * 32), order='F')
        # image_cv2_3 = np.reshape(conv2[:, :, 16:24], (21, 21 * 8), order='F')
        # image_cv2_4 = np.reshape(conv2[:, :, 24:32], (21, 21 * 8), order='F')
        image_cv3 = np.concatenate(
            (padding, image_cv3_1, image_cv3_2), axis=0)
        # Save filters
        filter_path = root_images + "filters/step_" + \
            str(step) + "/"
        if not os.path.exists(filter_path):
            os.makedirs(filter_path)
        filter_path = filter_path + "conv_3.jpg"

        I = image_cv3
        I8 = (((I - I.min()) / (I.max() - I.min())) * 255.9).astype(np.uint8)
        image_cv3_resized = cv2.resize(I8, (84 * 4, 11 * 2),
                                       interpolation=cv2.INTER_AREA)
        img = Image.fromarray(image_cv3_resized)
        img.save(filter_path)

        # Assemble final image ####################

        image_input = np.reshape(image_t, (84, 84 * 4), order='F')
        image_input_resized = cv2.resize(image, (84 * 4, 84),
                                         interpolation=cv2.INTER_AREA)
        final_image = np.concatenate(
            (image_input_resized, image_cv1_resized, image_cv2_resized, image_cv3_resized))

        # Plot the image
        # ax3 = plt.subplot(gs[0, :])

        # but first clear the old one

        # specify greyscale instead BGR
        ax3 = plt.subplot(2, 1, 1)
        ax3.imshow(final_image, cmap='gray')
        ##########################################
        ##               HISTOGRAM              ##
        ##########################################
        # print np.shape(action_distribution_1)
        # print len(action_distribution_1[0])
        ind = np.arange(len(action_distribution_1[0]))
        # print ind
        width = 0.4
        # fig, ax = plt.subplots()
        # ax2 = plt.subplot(gs[1, :])
        ax2.clear()
        ax2 = plt.subplot(2, 1, 2)
        ax2.set_aspect(3)

        rects = ax2.bar(0.4 + ind, action_distribution_1[0],
                        width=width, color='r', alpha=0.4)
        rects[np.argmax(action_distribution_1)].set_color('g')
        ax2.set_xticks(0.4 + ind + width / 2)
        ax2.set_xticklabels(("left", "right", "forward",
                             "backward", "stop", "descend"))
        for i in ax2.xaxis.get_ticklabels():
            i.set_color("white")

        ax2.set_ylim([0, 1])

        # plt.xticks(fontsize=16)
        # plt.show()

        ##########################################

        # 3 - Send command and wait for translation
        raw_input("Press a button to acquire images...")

        # 4 - Acquire second frame and add to the stack
        if source == 3:
            global _last_image
            _last_image = acquire_frame(video_stream_url)
            time.sleep(1)
        image_t1 = _last_image
        # Save every one image image
        image_path = root_images + "image_" + str(step) + ".png"
        cv2.imwrite(image_path, image_t1)
        image_t1 = np.expand_dims(image_t1, 2)
        image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)

        # 5 - Forward stack in input to the network
        image_t = image_t1
Ejemplo n.º 2
0
def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """
    # ATTENTION: If you want to restore files from a previous simulation you
    # must pass valid values for these variables:
    policy_weights_path = '/home/pulver/Desktop/simulation_8/checkpoint/episode_20500/policy/policy_checkpoint.ckp'
    summary_folder = ""  # if empty the summary is written in ./log/ + current time
    start_episode = 0  # change to last episode number
    frame_counter = 0  # change to last number of frames
    epsilon = 0.1
    screen_width = 84  # original is 160
    screen_height = 84  # original is 210
    images_stack_size = 4
    # Use only the first 5 actions for this simulation
    # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend']
    # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend)
    tot_actions = 6
    batch_size = 64  # size of the experience batch
    tot_steps = 1000  # finite-horizont simulation
    # 10x10^6 high number since training can be stopped at every time
    tot_episodes = 210
    steps_per_episodes = 40  # expressed in number step
    noop_time = 2.0  # pause in seconds between actions

    num_ground_plane = 70
    actual_ground_index = 0
    # episode_per_ground specify the number of episodes with the same ground plane
    ground_counter = 1
    episode_per_ground = 10

    timer_total_start = time.time()
    rospy.init_node("DRLTestNode")
    rospy.loginfo("----- DRL Test Node -----")

    # Create a subscriber fot the greyscale image
    # rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback)#TODO
    # restore default
    rospy.Subscriber(
        "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback,queue_size=30)  # Store the last 30 messages before discarding them

    images_stack_size = 4

    r = rospy.Rate(10)  # 10hz
    ground_list = ["asphalt11", "asphalt12", "asphalt13",
                   "brick11", "brick12", "brick13",
                   "grass11", "grass12", "grass13",
                   "pavement11", "pavement12", "pavement13",
                   "sand11", "sand12", "sand13",
                   "snow11", "snow12", "snow13",
                   "soil11", "soil12", "soil13"]

    # Init session and networks
    sess = tf.Session()
    if(summary_folder == ""):
        tf_summary_writer = tf.summary.FileWriter(
            './log/' + str(datetime.datetime.now().time()), sess.graph)
    else:
        tf_summary_writer = tf.summary.FileWriter(
            summary_folder, sess.graph)
    policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=(
        screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net")
    
    init = tf.global_variables_initializer()
    sess.run(init)

    
    if(policy_weights_path != ""):
        print("Loading weights from memory: " + str(policy_weights_path))
        policy_network.load_weights(policy_weights_path)
    else:
        print("The networks path are empty. Choose appropriate weights!")
        sys.exit()

    # start here
    for episode in range(start_episode, tot_episodes):
        # Reset the ground in the environment every 50 episodes (or episode_per_ground)
        ground_index = episode / episode_per_ground
        if ground_index != actual_ground_index or (episode == 0):
            clean_world(ground_list)
            generate_new_world(ground_index, ground_list)
        actual_ground_index = ground_index

        timer_start = time.time()
        actual_time = rospy.get_rostime()
        rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
        frame_episode = 0
        cumulated_reward = 0
        epsilon_used = 0
        send_action("takeoff")
        print("")
        print("Episode: " + str(episode))
        # 1-Accumulate the first state
        reset_pose()
        print "Reset pose!"
        send_action("stop")
        rospy.sleep(1.0)
#        get_image()
        state = _last_image
        # create a stack of X images
        image_t = np.stack([state] * images_stack_size, axis=2)

        for step in range(tot_steps):
            # 2- Get the action following epsilon-greedy or through policy network.
            # Here image_t is always ready and it can be given as input to
            # the network
            epsilon_used_bool = False
            if(np.random.random_sample(1) < epsilon):
                epsilon_used += 1
                action = get_random_action()
                epsilon_used_bool = True
            # Take the action from the policy network
            else:
                # Here image_t is always ready and it can be given as input to
                # the network
                action_distribution = policy_network.return_action_distribution(
                    input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False)
                action = np.argmax(action_distribution)
                action = convert_action_int_to_str(action)
            #print action
            send_action(action)
            rospy.sleep(noop_time)
            # 3- Get the state_t1 repeating the action obtained at step-2 and add everything
            # in the replay buffer. Then set state_t = state_t1
            #get_image()
            image_t1 = _last_image
            send_action("stop")
            # If the action taken is to land and the UAV is inside the landing BB, done will be calculated accordingly
            done_reward = get_done_reward()
            reward = done_reward.reward
            done = done_reward.done
            if epsilon_used_bool:
                print " Step(" + str(step) + "), Action: " + action + ", Altitude: " + str(done_reward.z) + ", Reward: " + str(reward)
            else:
                print "#Step(" + str(step) + "), Action: " + action + ", Altitude: " + str(done_reward.z) + ", Reward: " + str(reward)
                # ['left', 'right', 'forward', 'backward', 'stop', 'land'
                print(" left:" + str(action_distribution[0][0][0]) + "; right:" + str(action_distribution[0][0][1]) + "; forward:" + str(action_distribution[0][0][2]) + "; backward:" + str(action_distribution[0][0][3]) + "; stop:" + str(action_distribution[0][0][4])  + "; descend:" + str(action_distribution[0][0][5]))
            image_t1 = np.expand_dims(image_t1, 2)
            # stack the images
            image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
            frame_counter += 1  # To call every time a frame is obtained
            frame_episode += 1
            image_t = image_t1
            get_relative_pose(ground_index, episode, step, reward)
            cumulated_reward += reward
            # At every step check if more than 30 seconds from the start passed.
            # In that case, set done = True and end the episode
            timer_stop = time.time()
            # Stop the episode if the number of frame is more than a threshold
            if frame_episode >= steps_per_episodes:
                done = True
            # When the episode is done
            if done:
                timer_stop = time.time()
                actual_time = rospy.get_rostime()
                rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
                rospy_time_elapsed = rospy_stop_time - rospy_start_time
                print("Tot Frame counter: " + str(frame_counter))
                print("Time episode: " + str(timer_stop - timer_start) + " seconds")
                print("Ros time episode: " +
                      str(rospy_time_elapsed) + " seconds")
                if cumulated_reward >= 0:
                    rospy.logwarn("Positive reward obtained!")
                print("Cumulated reward: " + str(cumulated_reward))
                print("Episode finished after {} timesteps".format(step + 1))
                print("Epsilon used: " + str(epsilon_used) + " out of " + str(step + 1) + "(" + str(float((epsilon_used * 100.0) / (step + 1.0))) + "%)")
                sys.stdout.flush()
                time.sleep(5)
                break
Ejemplo n.º 3
0
def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """

    # ground_list = ["water1",
    #             "water2",
    #             "water3",
    #             "water4",
    #             "water5",
    #             "water6",
    #             "water7",
    #             "water8",
    #             "water9",
    #             "water10"]

    # ground_list = ["water11", "water12", "water13",
    #    "water14", "water15", "water16",
    #    "water17", "water18", "water19"]

    ground_list = [
        "pavement11", "pavement12", "pavement5", "pavement7", "pavement14",
        "pavement15", "pavement16"
    ]
    # NOTE: the ckp path must be modified in the code below
    ckp_list = ["120750"]
    # ATTENTION: If you want to restore files from a previous simulation you
    # must pass valid values for these variables:
    # policy_weights_path =
    # '/home/pulver/Desktop/marine_descending/checkpoint/episode_94500/policy/policy_checkpoint.ckp'
    summary_folder = ""  # if empty the summary is written in ./log/ + current time
    start_episode = 0  # change to last episode number
    frame_counter = 0  # change to last number of frames

    screen_width = 84  # original is 160
    screen_height = 84  # original is 210
    images_stack_size = 4
    # Use only the first 5 actions for this simulation
    # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend']
    # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend)
    tot_actions = 6
    batch_size = 32  # size of the experience batch
    tot_steps = 1000  # finite-horizont simulation
    # 10x10^6 high number since training can be stopped at every time

    steps_per_episodes = 40  # expressed in number step
    noop_time = 2.0  # pause in seconds between actions

    num_ground_plane = len(ground_list)
    actual_ground_index = 0
    # episode_per_ground specify the number of episodes with the same ground
    # plane
    episode_per_ground = 10
    tot_episodes = episode_per_ground * num_ground_plane
    accuracy_ground = 0.0

    timer_total_start = time.time()
    rospy.init_node("DRLTestNode")
    rospy.loginfo("----- DRL Test Node -----")

    # Create a subscriber fot the greyscale image
    # rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback)#TODO
    # restore default
    rospy.Subscriber(
        "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw",
        ROSImage,
        image_callback,
        queue_size=30)  # Store the last 30 messages before discarding them

    # Create a publisher for changing light position
    # light_pub = rospy.Publisher('/gazebo/default/light/modify', Light)
    images_stack_size = 4

    # r = rospy.Rate(10)  # 10hz

    # Init session and networks
    sess = tf.Session()
    policy_network = QNetwork(sess,
                              tot_actions=tot_actions,
                              image_shape=(screen_width, screen_height,
                                           images_stack_size),
                              batch_size=batch_size,
                              network_name="policy_net")
    init = tf.global_variables_initializer()
    sess.run(init)

    for i in range(len(ckp_list)):
        ckp = ckp_list[i]
        policy_weights_path = '/home/pulver/Desktop/sim11/episode_' + \
            str(ckp) + '/policy/policy_checkpoint.ckp'

        if (summary_folder == ""):
            tf_summary_writer = tf.summary.FileWriter(
                './' + str(ckp) + '/log/' +
                str(datetime.datetime.now().time()), sess.graph)
        else:
            tf_summary_writer = tf.summary.FileWriter(summary_folder,
                                                      sess.graph)

        if (policy_weights_path != ""):
            print("Loading weights from memory: " + str(policy_weights_path))
            policy_network.load_weights(policy_weights_path)
        else:
            print("The networks path are empty. Choose appropriate weights!")
            sys.exit()

        # start here
        for episode in range(start_episode, tot_episodes):
            # Reset the ground in the environment every episode_per_ground
            ground_index = episode / episode_per_ground
            print "Current ground index: " + str(ground_index)
            if episode % (
                    episode_per_ground) == 0 and episode != start_episode:
                with open(
                        "./" + str(ckp) + "/accuracy_ckp" + str(ckp) + ".csv",
                        "a") as myfile:
                    accuracy_ground = accuracy_ground / \
                        (episode_per_ground * 1.0)
                    print "Final accuracy value: " + str(accuracy_ground)
                    string_to_add = ground_list[ground_index - 1] + \
                        "," + str(accuracy_ground) + "\n"
                    myfile.write(string_to_add)

            if ground_index != actual_ground_index or (episode
                                                       == start_episode):
                accuracy_ground = 0.0
                clean_world(ground_list)
                generate_new_world(ground_index, ground_list)

            # Reset the light every 10 episodes
            # if (episode % 10 == 0):
            #     # Change the position of the three point lights
            #     for i in range(0, 3):
            #         model_to_change = "user_point_light_" + str(i)
            #         x = STDrandom.uniform(-5.0, 5.0)
            #         y = STDrandom.uniform(-5.0, 5.0)
            #         z = STDrandom.uniform(1.0, 10.0)

            #         # os.system("rosrun gazebo_ros spawn_model -file ~/.gazebo/models/" + model_to_change +
            #         #           "/model.sdf -sdf -model " + model_to_change + "_plane -x " + x + " -y " + y + " -z " + z + "")

            #         light_msg = Light()
            #         light_msg.pose.x = x
            #         light_msg.pose.y = y
            #         light_msg.pose.z = z
            #         light_msg.name = model_to_change
            #         light_pub.publish(light_msg)
            #         rospy.loginfo("Lights have been resetted!")

            actual_ground_index = ground_index
            timer_start = time.time()
            actual_time = rospy.get_rostime()
            rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
            frame_episode = 0
            cumulated_reward = 0
            epsilon_used = 0
            send_action("takeoff")
            print("")
            print("Episode: " + str(episode))
            # 0-Reset USV's position
            os.system(
                "rosservice call /gazebo/set_model_state '{model_state: { model_name: usv_model, pose: { position: { x: 0.0, y: 0.0 ,z: 5.0 }, orientation: {x: 0, y: 0.0, z: 0, w: 0.0 } }, twist: { linear: {x: 0.0 , y: 0 ,z: 0 } , angular: { x: 0.0 , y: 0 , z: 0.0 } } , reference_frame: world } }'"
            )
            # 1-Accumulate the first state
            reset_pose()
            print "Reset pose!"
            send_action("stop")
            rospy.sleep(1.0)
            #        get_image()
            state = _last_image
            # create a stack of X images
            image_t = np.stack([state] * images_stack_size, axis=2)

            for step in range(tot_steps):
                # 2- Get the action following epsilon-greedy or through policy network.
                # Here image_t is always ready and it can be given as input to
                # the network
                action_distribution = policy_network.return_action_distribution(
                    input_data=np.reshape(image_t,
                                          (1, 84, 84, images_stack_size)),
                    softmax=False)
                # print ""
                # print action_distribution
                action = np.argmax(action_distribution)
                action = convert_action_int_to_str(action)
                # print action
                send_action(action)
                rospy.sleep(noop_time)
                # 3- Get the state_t1 repeating the action obtained at step-2 and add everything
                # in the replay buffer. Then set state_t = state_t1
                # get_image()
                image_t1 = _last_image
                send_action("stop")
                # If the action taken is to land and the UAV is inside the landing
                # BB, done will be calculated accordingly
                done_reward = get_done_reward()
                reward = done_reward.reward
                done = done_reward.done
                image_t1 = np.expand_dims(image_t1, 2)
                # stack the images
                image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
                frame_counter += 1  # To call every time a frame is obtained
                frame_episode += 1
                image_t = image_t1
                get_relative_pose(ground_index, episode, step, reward,
                                  ground_list, ckp)
                cumulated_reward += reward
                # At every step check if more than 30 seconds from the start passed.
                # In that case, set done = True and end the episode
                timer_stop = time.time()
                # Stop the episode if the number of frame is more than a
                # threshold
                if frame_episode >= steps_per_episodes:
                    done = True
                # When the episode is done
                if done:
                    timer_stop = time.time()
                    actual_time = rospy.get_rostime()
                    rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
                    rospy_time_elapsed = rospy_stop_time - rospy_start_time
                    print("Tot Frame counter: " + str(frame_counter))
                    print("Time episode: " + str(timer_stop - timer_start) +
                          " seconds")
                    print("Ros time episode: " + str(rospy_time_elapsed) +
                          " seconds")
                    if cumulated_reward >= 0:
                        rospy.logwarn("Positive reward obtained!")
                    print("Cumulated reward: " + str(cumulated_reward))
                    print("Episode finished after {} timesteps".format(step +
                                                                       1))
                    sys.stdout.flush()

                    with open(
                            "./" + str(ckp) + "/results_ckp" + str(ckp) + "_" +
                            str(datetime.date.today().strftime("%m%d%Y")) +
                            ".csv", "a") as myfile:
                        boolean_reward = 0
                        if (cumulated_reward > 0):
                            boolean_reward = 1
                            accuracy_ground += 1.0
                        print "Current accuracy value: " + str(accuracy_ground)
                        string_to_add = ground_list[ground_index] + "," + str(
                            episode) + "," + str(step) + "," + str(
                                cumulated_reward) + "," + str(
                                    boolean_reward) + '\n'
                        myfile.write(string_to_add)

                    if episode == tot_episodes - 1:
                        with open(
                                "./" + str(ckp) + "/accuracy_ckp" + str(ckp) +
                                ".csv", "a") as myfile:
                            accuracy_ground = accuracy_ground / \
                                (episode_per_ground * 1.0)
                            print "Final accuracy value: " + str(
                                accuracy_ground)
                            string_to_add = ground_list[actual_ground_index] + \
                                "," + str(accuracy_ground) + "\n"
                            myfile.write(string_to_add)

                    break
Ejemplo n.º 4
0
def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """
    # ATTENTION: If you want to restore files from a previous simulation you
    # must pass valid values for these variables:
    policy_weights_path = '/home/pulver/Desktop/episode_113250/policy/policy_checkpoint.ckp'
    summary_folder = ""  # if empty the summary is written in ./log/ + current time

    screen_width = 84  # original is 160
    screen_height = 84  # original is 210
    images_stack_size = 4
    # Use only the first 5 actions for this simulation
    # action_list = ['left', 'right', 'forward', 'backward', 'stop', 'descend']
    # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (descend)
    tot_actions = 6
    batch_size = 32  # size of the experience batch
    tot_steps = 1000  # finite-horizont simulation
    # 10x10^6 high number since training can be stopped at every time

    steps_per_episodes = 40  # expressed in number step
    noop_time = 2.0  # pause in seconds between actions

    timer_total_start = time.time()
    rospy.init_node("DRLTestNode")
    rospy.loginfo("----- DRL Test Node -----")

    # Create a subscriber fot the greyscale image
    rospy.Subscriber("/drl/grey_camera", ROSImage, image_callback)  # TODO
    # restore default
    # rospy.Subscriber("/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30)  # Store the last 30 messages
    # before discarding them

    images_stack_size = 4

    r = rospy.Rate(10)  # 10hz

    # Init session and networks
    sess = tf.Session()
    if (summary_folder == ""):
        tf_summary_writer = tf.summary.FileWriter(
            './log/' + str(datetime.datetime.now().time()), sess.graph)
    else:
        tf_summary_writer = tf.summary.FileWriter(summary_folder, sess.graph)
    policy_network = QNetwork(sess,
                              tot_actions=tot_actions,
                              image_shape=(screen_width, screen_height,
                                           images_stack_size),
                              batch_size=batch_size,
                              network_name="policy_net")

    init = tf.global_variables_initializer()
    sess.run(init)

    if (policy_weights_path != ""):
        print("Loading weights from memory: " + str(policy_weights_path))
        policy_network.load_weights(policy_weights_path)
    else:
        print("The networks path are empty. Choose appropriate weights!")
        sys.exit()

    ##########################################################################
    ##                                START HERE                              ##
    ##########################################################################
    timer_start = time.time()
    actual_time = rospy.get_rostime()
    rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
    frame_episode = 0
    cumulated_reward = 0
    send_action("takeoff")
    # 1-Accumulate the first state
    send_action("stop")
    rospy.sleep(1.0)
    # acquire image from bottomcamera
    # get_image()
    state = _last_image
    # create a stack of X images
    image_t = np.stack([state] * images_stack_size, axis=2)

    for step in range(tot_steps):
        # 2- Get the action following epsilon-greedy or through policy network.
        # Here image_t is always ready and it can be given as input to
        # the network
        action_distribution = policy_network.return_action_distribution(
            input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)),
            softmax=False)
        # print ""
        # print action_distribution
        action = np.argmax(action_distribution)
        action = convert_action_int_to_str(action)
        # print action
        send_action(action)
        rospy.sleep(noop_time)
        # 3- Get the state_t1 repeating the action obtained at step-2 and add everything
        # in the replay buffer. Then set state_t = state_t1
        # get_image()
        image_t1 = _last_image
        send_action("stop")
        # If the action taken is to land and the UAV is inside the landing
        # BB, done will be calculated accordingly

        # NOTE: in the real implementation there's no way to get done and reward
        # done_reward = get_done_reward()
        # reward = done_reward.reward
        # done = done_reward.done

        image_t1 = np.expand_dims(image_t1, 2)
        # stack the images
        image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
        frame_episode += 1
        image_t = image_t1

        # cumulated_reward += reward

        # At every step check if more than 30 seconds from the start passed.
        # In that case, set done = True and end the episode
        timer_stop = time.time()
        # Stop the episode if the number of frame is more than a threshold
        if frame_episode >= steps_per_episodes:
            done = True
        # When the episode is done
        if done:
            timer_stop = time.time()
            actual_time = rospy.get_rostime()
            rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
            rospy_time_elapsed = rospy_stop_time - rospy_start_time
            print("Time episode: " + str(timer_stop - timer_start) +
                  " seconds")
            print("Ros time episode: " + str(rospy_time_elapsed) + " seconds")
            # if cumulated_reward >= 0:
            #     rospy.logwarn("Positive reward obtained!")
            # print("Cumulated reward: " + str(cumulated_reward))
            print("Episode finished after {} timesteps".format(step + 1))
            sys.stdout.flush()
            break
Ejemplo n.º 5
0
def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """
    # ATTENTION: If you want to restore files from a previous simulation you
    # must pass valid values for these variables:
    policy_weights_path = '/home/pulver/Desktop/DQN-single/episode_36000/policy/policy_checkpoint.ckp'

    ground_list = [
        "asphalt11_small", "asphalt12_small", "asphalt13_small",
        "brick11_small", "brick12_small", "brick13_small", "grass11_small",
        "grass12_small", "grass13_small", "pavement11_small",
        "pavement12_small", "pavement13_small", "sand11_small", "sand12_small",
        "sand13_small", "snow11_small", "snow12_small", "snow13_small",
        "soil11_small", "soil12_small", "soil13_small"
    ]

    summary_folder = ""  # if empty the summary is written in ./log/ + current time
    start_episode = 0  # change to last episode number
    frame_counter = 0  # change to last number of frames

    screen_width = 84  # original is 160
    screen_height = 84  # original is 210
    images_stack_size = 4
    # Use only the first 5 actions for this simulation
    # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (land),
    # 6 (ascend), 7 (descend), 8 (rotate_left), 9 (rotate_right)
    tot_actions = 6
    batch_size = 32  # size of the experience batch
    tot_steps = 1000  # finite-horizont simulation
    # 10x10^6 high number since training can be stopped at every time
    tot_episodes = 50
    steps_per_episodes = 20  # expressed in number step
    noop_time = 2.0  # pause in seconds between actions

    num_ground_plane = 21
    actual_ground_index = 0
    # episode_per_ground specify the number of episodes with the same ground
    # plane
    ground_counter = 1
    episode_per_ground = 10

    timer_total_start = time.time()
    rospy.init_node("DeepReinforcedLanding")
    rospy.loginfo("----- Deep Reinforced Landing Node -----")

    rospy.Subscriber(
        "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw",
        ROSImage,
        image_callback,
        queue_size=30)  # Store the last 30 messages before discarding them

    r = rospy.Rate(10)  # 10hz

    # Init session and networks
    sess = tf.Session()
    if (summary_folder == ""):
        tf_summary_writer = tf.summary.FileWriter(
            './log/' + str(datetime.datetime.now().time()), sess.graph)
    else:
        tf_summary_writer = tf.summary.FileWriter(summary_folder, sess.graph)
    policy_network = QNetwork(sess,
                              tot_actions=tot_actions,
                              image_shape=(screen_width, screen_height,
                                           images_stack_size),
                              batch_size=batch_size,
                              network_name="policy_net")
    init = tf.global_variables_initializer()
    sess.run(init)

    # Load Neural Networks weights from memory if a valid checkpoint path is passed
    # if(os.path.isfile(policy_weights_path) == True and
    # os.path.isfile(target_weights_path) == True):
    try:
        print("Loading weights from memory...")
        policy_network.load_weights(policy_weights_path)
    except:
        print("Error while loading the weights!")
        return

    # start here
    for episode in range(start_episode, tot_episodes):
        # Reset the ground in the environment every 50 episodes (or
        # episode_per_ground)
        ground_index = episode / episode_per_ground
        if ground_index != actual_ground_index or (episode == start_episode):
            clean_world(ground_list)
            generate_new_world(ground_list)
        actual_ground_index = ground_index

        timer_start = time.time()
        actual_time = rospy.get_rostime()
        rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
        cumulated_reward = 0
        print("")
        print("Episode: " + str(episode))
        # 1-Accumulate the first state
        reset_pose()
        print "Reset pose!"
        send_action("stop")
        rospy.sleep(1.0)
        state = _last_image
        # create a stack of X images
        image_t = np.stack([state] * images_stack_size, axis=2)
        frame_episode = 0
        for step in range(tot_steps):
            # 2- Get the action following epsilon-greedy or through policy network.
            # With probability epsilon take random action otherwise it takes
            # an action from the policy network.
            # Take a random action

            # Here image_t is always ready and it can be given as input to
            # the network
            action_distribution = policy_network.return_action_distribution(
                input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)),
                softmax=False)
            action = np.argmax(action_distribution)
            action = convert_action_int_to_str(action)
            # action = get_random_action()
            send_action(action)
            rospy.sleep(noop_time)
            # 3- Get the state_t1 repeating the action obtained at step-2 and add everything
            # in the replay buffer. Then set state_t = state_t1
            # get_image()
            image_t1 = _last_image
            # If the action taken is to land and the UAV is inside the landing
            # BB, done will be calculated accordingly
            done_reward = get_done_reward()
            reward = done_reward.reward
            done = done_reward.done
            image_t1 = np.expand_dims(image_t1, 2)
            # stack the images
            image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
            image_t = image_t1
            cumulated_reward += reward
            send_action("stop")
            frame_episode = frame_episode + 1

            # get_relative_pose(ground_index, episode, step, reward)

            # At every step check if more than 30 seconds from the start passed.
            # In that case, set done = True and end the episode
            timer_stop = time.time()
            # Stop the episode if the number of frame is more than a threshold
            if frame_episode >= steps_per_episodes:
                done = True
            # if timer_stop - timer_start >= 30.0: # maximum time allowed
            #     #cumulated_reward += -1
            #     done = True
            # When the episode is done
            if done:
                timer_stop = time.time()
                actual_time = rospy.get_rostime()
                rospy_stop_time = actual_time.secs + actual_time.nsecs / 1000000000.0
                rospy_time_elapsed = rospy_stop_time - rospy_start_time
                print("Tot Frame counter: " + str(frame_counter))
                print("Time episode: " + str(timer_stop - timer_start) +
                      " seconds")
                print("Ros time episode: " + str(rospy_time_elapsed) +
                      " seconds")
                if cumulated_reward >= 0:
                    rospy.logwarn("Positive reward obtained!")
                print("Cumulated reward: " + str(cumulated_reward))
                print("Episode finished after {} timesteps".format(step + 1))
                sys.stdout.flush()
                # time.sleep(5)
                with open(
                        "./results_" +
                        str(datetime.date.today().strftime("%m%d%Y")) + ".csv",
                        "a") as myfile:
                    boolean_reward = 0
                    if (cumulated_reward > 0):
                        boolean_reward = 1
                    ground_list = "mixed"
                    string_to_add = "mixed" + "," + str(episode) + "," + str(
                        step) + "," + str(cumulated_reward) + "," + str(
                            boolean_reward) + '\n'
                    myfile.write(string_to_add)
                    break
                break
Ejemplo n.º 6
0
def main():
    """
    Main function for training the DQN network in learning how to accomplish autonomous landing.
    """
    # ATTENTION: If you want to restore files from a previous simulation you
    # must pass valid values for these variables:
    policy_weights_path = '/home/pulver/Desktop/episode_32400/policy/policy_checkpoint.ckp'

    summary_folder = ""  # if empty the summary is written in ./log/ + current time
    start_episode = 0  # change to last episode number
    frame_counter = 0  # change to last number of frames

    screen_width = 84  # original is 160
    screen_height = 84  # original is 210
    images_stack_size = 4
    # Use only the first 5 actions for this simulation
    # 0 (left, 1 (right), 2 (forward), 3 (backward), 4 (stop), 5 (land),
    # 6 (ascend), 7 (descend), 8 (rotate_left), 9 (rotate_right)
    tot_actions = 6
    batch_size = 32  # size of the experience batch
    tot_steps = 1000  # finite-horizont simulation
    # 10x10^6 high number since training can be stopped at every time
    tot_episodes = 1050
    steps_per_episodes = 20  # expressed in number step
    noop_time = 2.0  # pause in seconds between actions

    num_ground_plane = 21
    episodes_per_ground_plane = tot_episodes / num_ground_plane
    actual_ground_index = 0

    timer_total_start = time.time()
    rospy.init_node("DeepReinforcedLanding")
    rospy.loginfo("----- Deep Reinforced Landing Node -----")

    rospy.Subscriber(
        "/quadrotor/ardrone/bottom/ardrone/bottom/image_raw", ROSImage, image_callback, queue_size=30)  # Store the last 30 messages before discarding them

    r = rospy.Rate(10)  # 10hz

    # Init session and networks
    sess = tf.Session()
    if(summary_folder == ""):
        tf_summary_writer = tf.summary.FileWriter(
            './log/' + str(datetime.datetime.now().time()), sess.graph)
    else:
        tf_summary_writer = tf.summary.FileWriter(
            summary_folder, sess.graph)
    policy_network = QNetwork(sess, tot_actions=tot_actions, image_shape=(
        screen_width, screen_height, images_stack_size), batch_size=batch_size, network_name="policy_net")
    init = tf.global_variables_initializer()
    sess.run(init)

    # Load Neural Networks weights from memory if a valid checkpoint path is passed
    # if(os.path.isfile(policy_weights_path) == True and
    # os.path.isfile(target_weights_path) == True):
    try:
        print("Loading weights from memory...")
        policy_network.load_weights(policy_weights_path)
    except:
        print("Error while loading the weights!")
        return

    print "Episode, step, action, reward, action_distribution"
    # start here
    for episode in range(start_episode, tot_episodes):
        # Reset the ground in the environment every 25k frames
        # ground_index = episode / 50
        # if ground_index != actual_ground_index or episode == 0:
        #     generate_new_world(ground_index)
        # actual_ground_index = ground_index
        timer_start = time.time()
        actual_time = rospy.get_rostime()
        rospy_start_time = actual_time.secs + actual_time.nsecs / 1000000000.0
        cumulated_reward = 0   
        # 1-Accumulate the first state
        #reset_pose()
        send_action("stop")
        rospy.sleep(1.0)
        state = _last_image
        # create a stack of X images
        image_t = np.stack([state] * images_stack_size, axis=2)
        frame_episode = 0
        pad_size = 3
        pad_value = 255
        for step in range(tot_steps):
            # 2- Get the action following epsilon-greedy or through policy network.
            # With probability epsilon take random action otherwise it takes
            # an action from the policy network.
            # Take a random action
            
            image_stack_padded = [np.lib.pad(image_t[:,:,0], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)),
            np.lib.pad(image_t[:,:,1], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)),
            np.lib.pad(image_t[:,:,2], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value)),
            np.lib.pad(image_t[:,:,3], (pad_size, pad_size), 'constant', constant_values=(pad_value, pad_value))]
            image_hstack = np.hstack(image_stack_padded)
            cv2.imwrite("/home/pulver/Desktop/print_network_action_distribution/img_ep"+str(episode)+"_step" + str(step) + ".png", image_hstack )            
            # Here image_t is always ready and it can be given as input to
            # the network
            action_distribution = policy_network.return_action_distribution( input_data=np.reshape(image_t, (1, 84, 84, images_stack_size)), softmax=False)
            
            action = np.argmax(action_distribution)
            action = convert_action_int_to_str(action)
            #action = get_random_action()
            send_action(action)
            rospy.sleep(noop_time)
            # 3- Get the state_t1 repeating the action obtained at step-2 and add everything
            # in the replay buffer. Then set state_t = state_t1
            # get_image()
            image_t1 = _last_image
            # If the action taken is to land and the UAV is inside the landing
            # BB, done will be calculated accordingly
            done_reward = get_done_reward()
            reward = done_reward.reward
            done = done_reward.done
            image_t1 = np.expand_dims(image_t1, 2)
            # stack the images
            image_t1 = np.append(image_t[:, :, 1:], image_t1, axis=2)
            image_t = image_t1
            cumulated_reward += reward
            send_action("stop")
            frame_episode = frame_episode + 1
            #get_relative_pose(ground_index, episode, step, reward)
            # At every step check if more than 30 seconds from the start passed.
            # In that case, set done = True and end the episode
            timer_stop = time.time()
            print str(episode) + "," + str(step) + "," + action + "," + str(reward) + "," +str(action_distribution)
            # Stop the episode if the number of frame is more than a threshold
            if frame_episode >= steps_per_episodes:
                done = True
            # if timer_stop - timer_start >= 30.0: # maximum time allowed
            #     #cumulated_reward += -1
            #     done = True
            # When the episode is done
            if done:
                break