예제 #1
0
            qlearn.learn(state, action, reward, nextState)

            if not (done):
                rospy.logwarn("NOT DONE")
                state = nextState
            else:
                rospy.logwarn("DONE")
                last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
                break
            rospy.logwarn("############### END Step=>" + str(i))
            #raw_input("Next Step...PRESS KEY")
            # rospy.sleep(2.0)
        m, s = divmod(int(time.time() - start_time), 60)
        h, m = divmod(m, 60)
        rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(
            round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(
            cumulated_reward) + "     Time: %d:%02d:%02d" % (h, m, s)))

    rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str(
        initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |"))

    l = last_time_steps.tolist()
    l.sort()

    # print("Parameters: a="+str)
    rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean()))
    rospy.loginfo("Best 100 score: {:0.2f}".format(
        reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))

    env.close()
예제 #2
0
def main():
    rospy.init_node('movingcube_onedisk_walk_gym',
                    anonymous=True,
                    log_level=rospy.WARN)

    # Set the path where learned model will be saved
    rospack = rospkg.RosPack()
    pkg_path = rospack.get_path('my_moving_cube_pkg')
    models_dir_path = os.path.join(pkg_path, "models_saved")
    if not os.path.exists(models_dir_path):
        os.makedirs(models_dir_path)

    out_model_file_path = os.path.join(models_dir_path, "movingcube_model.pkl")

    max_timesteps = rospy.get_param("/moving_cube/max_timesteps")
    buffer_size = rospy.get_param("/moving_cube/buffer_size")
    # We convert to float becase if we are using Ye-X notation, it sometimes treats it like a string.
    lr = float(rospy.get_param("/moving_cube/lr"))

    exploration_fraction = rospy.get_param("/moving_cube/exploration_fraction")
    exploration_final_eps = rospy.get_param(
        "/moving_cube/exploration_final_eps")
    print_freq = rospy.get_param("/moving_cube/print_freq")

    reward_task_learned = rospy.get_param("/moving_cube/reward_task_learned")

    def callback(lcl, _glb):
        # stop training if reward exceeds 199
        aux1 = lcl['t'] > 100
        aux2 = sum(lcl['episode_rewards'][-101:-1]) / 100
        is_solved = aux1 and aux2 >= reward_task_learned

        rospy.logdebug("aux1=" + str(aux1))
        rospy.logdebug("aux2=" + str(aux2))
        rospy.logdebug("reward_task_learned=" + str(reward_task_learned))
        rospy.logdebug("IS SOLVED?=" + str(is_solved))

        return is_solved

    # Init OpenAI_ROS ENV
    task_and_robot_environment_name = rospy.get_param(
        '/moving_cube/task_and_robot_environment_name')
    env = StartOpenAI_ROS_Environment(task_and_robot_environment_name)
    rospy.loginfo("Starting Learning")

    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=lr,
        max_timesteps=max_timesteps,
        buffer_size=buffer_size,
        exploration_fraction=exploration_fraction,
        exploration_final_eps=exploration_final_eps,
        print_freq=
        print_freq,  # how many apisodes until you print total rewards and info
        param_noise=False,
        callback=callback)

    env.close()
    rospy.logwarn("Saving model to movingcube_model.pkl")
    act.save(out_model_file_path)