qlearn.learn(state, action, reward, nextState) if not (done): rospy.logwarn("NOT DONE") state = nextState else: rospy.logwarn("DONE") last_time_steps = numpy.append(last_time_steps, [int(i + 1)]) break rospy.logwarn("############### END Step=>" + str(i)) #raw_input("Next Step...PRESS KEY") # rospy.sleep(2.0) m, s = divmod(int(time.time() - start_time), 60) h, m = divmod(m, 60) rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str( round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str( cumulated_reward) + " Time: %d:%02d:%02d" % (h, m, s))) rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str( initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |")) l = last_time_steps.tolist() l.sort() # print("Parameters: a="+str) rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean())) rospy.loginfo("Best 100 score: {:0.2f}".format( reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:]))) env.close()
def main(): rospy.init_node('movingcube_onedisk_walk_gym', anonymous=True, log_level=rospy.WARN) # Set the path where learned model will be saved rospack = rospkg.RosPack() pkg_path = rospack.get_path('my_moving_cube_pkg') models_dir_path = os.path.join(pkg_path, "models_saved") if not os.path.exists(models_dir_path): os.makedirs(models_dir_path) out_model_file_path = os.path.join(models_dir_path, "movingcube_model.pkl") max_timesteps = rospy.get_param("/moving_cube/max_timesteps") buffer_size = rospy.get_param("/moving_cube/buffer_size") # We convert to float becase if we are using Ye-X notation, it sometimes treats it like a string. lr = float(rospy.get_param("/moving_cube/lr")) exploration_fraction = rospy.get_param("/moving_cube/exploration_fraction") exploration_final_eps = rospy.get_param( "/moving_cube/exploration_final_eps") print_freq = rospy.get_param("/moving_cube/print_freq") reward_task_learned = rospy.get_param("/moving_cube/reward_task_learned") def callback(lcl, _glb): # stop training if reward exceeds 199 aux1 = lcl['t'] > 100 aux2 = sum(lcl['episode_rewards'][-101:-1]) / 100 is_solved = aux1 and aux2 >= reward_task_learned rospy.logdebug("aux1=" + str(aux1)) rospy.logdebug("aux2=" + str(aux2)) rospy.logdebug("reward_task_learned=" + str(reward_task_learned)) rospy.logdebug("IS SOLVED?=" + str(is_solved)) return is_solved # Init OpenAI_ROS ENV task_and_robot_environment_name = rospy.get_param( '/moving_cube/task_and_robot_environment_name') env = StartOpenAI_ROS_Environment(task_and_robot_environment_name) rospy.loginfo("Starting Learning") model = deepq.models.mlp([64]) act = deepq.learn( env, q_func=model, lr=lr, max_timesteps=max_timesteps, buffer_size=buffer_size, exploration_fraction=exploration_fraction, exploration_final_eps=exploration_final_eps, print_freq= print_freq, # how many apisodes until you print total rewards and info param_noise=False, callback=callback) env.close() rospy.logwarn("Saving model to movingcube_model.pkl") act.save(out_model_file_path)