def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--algo", help="name of reinforcement algorithm", required=True) parser.add_argument("--mazes", help="maze config file", required=True) parser.add_argument("-s", "--samples", help="number of samples", default=30, type=int) parser.add_argument("--mdp", help="transition file without extension", default="deterministic", type=str) parser.add_argument("-a", "--alpha", help="value of learning rate",default=0.5, type=float) parser.add_argument("-g", "--gamma", help="value of discount", default=0.8, type=float) parser.add_argument("-e", "--epsilon", help="probability of random action", default=0, type=float) parser.add_argument("-l", "--learning", help="agent's update function", default=1, type=int) parser.add_argument("-b", "--back_cost", help="difficulty of backward action", default=10, type=float) parser.add_argument("-R", "--reward", help="value of reward", default=10, type=float) parser.add_argument("-r", "--reset", help="probability of resetting reward", default=0, type=float) parser.add_argument("-d", "--deadend_cost", help="penalty at a deadend", default=0, type=float) parser.add_argument("-q", "--Qreset", help="interval in which qvalues or reset", default="", type=str) parser.add_argument("-o", "--output", help="path to output file with extension", default=None) args = parser.parse_args(argv) # get mazes and episodes mazes, episodes = su.read_config(args.mazes) # total episodes total_episodes = sum(episodes) # Build MDP, Agent objects MDP = su.build_MDP(args.mdp) Agent = su.build_agent(args.algo, args.alpha, args.gamma, args.epsilon, args.learning,\ action_cost={'N':1, 'E':1, 'S':args.back_cost, 'W':1}) # Determine reset points (episodes where the Q-values are reset) reset_pts = [] if args.Qreset == 'tri': reset_pts = su.tri elif args.Qreset == 'trap': reset_pts = su.trap elif args.Qreset.isdigit(): reset_pts = range(0, total_episodes - 1, int(args.Qreset)) # Run agent through maze for s samples of n episodes for s in range(args.samples): paths = [] for maze, trial in zip(mazes, episodes): Maze = su.build_maze(maze, MDP, args.reward, args.reset, args.deadend_cost) Agent.change_maze(Maze) for i in range(trial): if i in reset_pts: Agent.reset_Qvalues() paths.append(su.play_maze(Agent)) if args.output is not None: su.path_csv(s, total_episodes, paths, args.output) # reset learning for each sample Agent.reset_Qvalues()
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("--algo", help="name of reinforcement algorithm", required=True) parser.add_argument("--mazes", help="maze config file", required=True) parser.add_argument("--mdp", help="transition file", default="deterministic.mdp", type=str) parser.add_argument("-a", "--alpha", help="value of learning rate",default=0.5, type=float) parser.add_argument("-g", "--gamma", help="value of discount", default=0.8, type=float) parser.add_argument("-e", "--epsilon", help="probability of random action", default=0, type=float) parser.add_argument("-l", "--learning", help="agent's update function", default=1, type=int) parser.add_argument("-b", "--back_cost", help="difficulty of backward action", default=10, type=float) parser.add_argument("-R", "--reward", help="value of reward", default=10, type=float) parser.add_argument("-r", "--reset", help="probability of resetting reward", default=0, type=float) parser.add_argument("-d", "--deadend_cost", help="penalty at a deadend", default=0, type=float) parser.add_argument("--input", help="file of input actions", required=True, type=str) parser.add_argument("-o", "--output", help="path to output file with extension", default=None) args = parser.parse_args(argv) # Build MDP, Agent objects MDP = su.build_MDP(args.mdp) Agent = su.build_agent(args.algo, args.alpha, args.gamma, args.epsilon, args.learning,\ action_cost={'N':1, 'E':1, 'S':args.back_cost, 'W':1}) input_file = pd.read_csv(args.input) paths = input_file["paths"] mazes = input_file["mazes"] probabilities = list() # loop through episodes/paths current_maze = None for maze, path in zip(mazes, paths): if maze != current_maze: new_maze = su.build_maze(maze, MDP, args.reward, args.reset, args.deadend_cost) Agent.change_maze(new_maze) probabilities.append(su.tether(path, Agent)) # write to .csv file if args.output is not None: num_trials = len(probabilities) data = {"trials": range(num_trials), "probability": probabilities} df = pd.DataFrame(data) df.set_index("trials", inplace=True) df.to_csv(args.output)