コード例 #1
0
ファイル: learn.py プロジェクト: v2tamprateep/RLSim
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--algo", help="name of reinforcement algorithm", required=True)
    parser.add_argument("--mazes", help="maze config file", required=True)
    parser.add_argument("-s", "--samples", help="number of samples", default=30, type=int)
    parser.add_argument("--mdp", help="transition file without extension", default="deterministic", type=str)
    parser.add_argument("-a", "--alpha", help="value of learning rate",default=0.5, type=float)
    parser.add_argument("-g", "--gamma", help="value of discount", default=0.8, type=float)
    parser.add_argument("-e", "--epsilon", help="probability of random action", default=0, type=float)
    parser.add_argument("-l", "--learning", help="agent's update function", default=1, type=int)
    parser.add_argument("-b", "--back_cost", help="difficulty of backward action", default=10, type=float)
    parser.add_argument("-R", "--reward", help="value of reward", default=10, type=float)
    parser.add_argument("-r", "--reset", help="probability of resetting reward", default=0, type=float)
    parser.add_argument("-d", "--deadend_cost", help="penalty at a deadend", default=0, type=float)
    parser.add_argument("-q", "--Qreset", help="interval in which qvalues or reset", default="", type=str)
    parser.add_argument("-o", "--output", help="path to output file with extension", default=None)
    args = parser.parse_args(argv)

    # get mazes and episodes
    mazes, episodes = su.read_config(args.mazes)

    # total episodes
    total_episodes = sum(episodes)

    # Build MDP, Agent objects
    MDP = su.build_MDP(args.mdp)
    Agent = su.build_agent(args.algo, args.alpha, args.gamma, args.epsilon, args.learning,\
                        action_cost={'N':1, 'E':1, 'S':args.back_cost, 'W':1})

    # Determine reset points (episodes where the Q-values are reset)
    reset_pts = []
    if args.Qreset == 'tri':
        reset_pts = su.tri
    elif args.Qreset == 'trap':
        reset_pts = su.trap
    elif args.Qreset.isdigit():
        reset_pts = range(0, total_episodes - 1, int(args.Qreset))


    # Run agent through maze for s samples of n episodes
    for s in range(args.samples):
        paths = []

        for maze, trial in zip(mazes, episodes):
            Maze = su.build_maze(maze, MDP, args.reward, args.reset, args.deadend_cost)
            Agent.change_maze(Maze)

            for i in range(trial):
                if i in reset_pts:
                    Agent.reset_Qvalues()
                paths.append(su.play_maze(Agent))

        if args.output is not None:
            su.path_csv(s, total_episodes, paths, args.output)

        # reset learning for each sample
        Agent.reset_Qvalues()
コード例 #2
0
ファイル: tether.py プロジェクト: v2tamprateep/RLSim
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("--algo", help="name of reinforcement algorithm", required=True)
    parser.add_argument("--mazes", help="maze config file", required=True)
    parser.add_argument("--mdp", help="transition file", default="deterministic.mdp", type=str)
    parser.add_argument("-a", "--alpha", help="value of learning rate",default=0.5, type=float)
    parser.add_argument("-g", "--gamma", help="value of discount", default=0.8, type=float)
    parser.add_argument("-e", "--epsilon", help="probability of random action", default=0, type=float)
    parser.add_argument("-l", "--learning", help="agent's update function", default=1, type=int)
    parser.add_argument("-b", "--back_cost", help="difficulty of backward action", default=10, type=float)
    parser.add_argument("-R", "--reward", help="value of reward", default=10, type=float)
    parser.add_argument("-r", "--reset", help="probability of resetting reward", default=0, type=float)
    parser.add_argument("-d", "--deadend_cost", help="penalty at a deadend", default=0, type=float)
    parser.add_argument("--input", help="file of input actions", required=True, type=str)
    parser.add_argument("-o", "--output", help="path to output file with extension", default=None)
    args = parser.parse_args(argv)

    # Build MDP, Agent objects
    MDP = su.build_MDP(args.mdp)
    Agent = su.build_agent(args.algo, args.alpha, args.gamma, args.epsilon, args.learning,\
                        action_cost={'N':1, 'E':1, 'S':args.back_cost, 'W':1})

    input_file = pd.read_csv(args.input)
    paths = input_file["paths"]
    mazes = input_file["mazes"]
    probabilities = list()

    # loop through episodes/paths
    current_maze = None
    for maze, path in zip(mazes, paths):
        if maze != current_maze:
            new_maze = su.build_maze(maze, MDP, args.reward, args.reset, args.deadend_cost)
            Agent.change_maze(new_maze)

        probabilities.append(su.tether(path, Agent))

    # write to .csv file
    if args.output is not None:
        num_trials = len(probabilities)
        data = {"trials": range(num_trials), "probability": probabilities}
        df = pd.DataFrame(data)
        df.set_index("trials", inplace=True)
        df.to_csv(args.output)