예제 #1
0
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') 
    domain = GridWorldInter(maze, noise=0.01)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = Tabular(domain, discretization=20)

    ## Policy
    policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = 12000
    opt["num_policy_checks"] = 20
    # experiment = ExperimentDelayed(**opt)
    experiment = Experiment(**opt)
    return experiment
예제 #2
0
def run_experiment_params(param_path='./params.yaml'):
    params = type("Parameters", (), load_yaml(param_path))


    def goalfn(state, goal, radius=0.1):
        # Can be quickly modified to have a new radius per goal element
        position = state[:2]
        return (
            np.linalg.norm(np.array(position)
                           - np.array(goal)) < radius
        )
    # # Load domain
    def encode_trial():
        rewards = list(params.domain_params['goalArray'])
        encode = Encoding(rewards, goalfn)
        return encode.strict_encoding

    params.domain_params['goalfn'] = goalfn
    params.domain_params['encodingFunction'] = encode_trial()
    # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4]
    domain = eval(params.domain)(**params.domain_params)
    # domain = eval(params.domain)()

    #Load Representation
    representation = eval(params.representation)(
                domain, 
                **params.representation_params)
    policy = eval(params.policy)(
                representation, 
                **params.policy_params)
    agent = eval(params.agent)(
                policy, 
                representation,
                discount_factor=domain.discount_factor, 
                **params.agent_params)

    opt = {}
    opt["exp_id"] = params.exp_id
    opt["path"] = params.results_path + getTimeStr() + "/"
    opt["max_steps"] = params.max_steps
    # opt["max_eps"] = params.max_eps

    opt["num_policy_checks"] = params.num_policy_checks
    opt["checks_per_policy"] = params.checks_per_policy

    opt["domain"] = domain
    opt["agent"] = agent

    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(param_path, opt["path"] + "params.yml")
    shutil.copy(inspect.getfile(eval(params.domain)), opt["path"] + "domain.py")
    shutil.copy(inspect.getfile(inspect.currentframe()), opt["path"] + "exper.py")


    return eval(params.experiment)(**opt)
예제 #3
0
def run_experiment_params(param_path='./params.yaml'):
    params = type("Parameters", (), load_yaml(param_path))

    def goalfn(s, goal):
        return -np.cos(s[0]) - np.cos(s[1] + s[0]) > goal

    def allMarkovReward(ps, ga, sr, gr):
        r = sr
        last_state = ps[len(ps) - 1]
        if any([goalfn(last_state, g) for g in ga]):
            r = gr
        return r

    # # Load domain
    def encode_trial():
        rewards = list(params.domain_params['goalArray'])
        print rewards
        encode = Encoding(rewards, goalfn)
        return encode.strict_encoding

    params.domain_params['goalfn'] = goalfn
    params.domain_params['rewardFunction'] = allMarkovReward
    params.domain_params['encodingFunction'] = encode_trial()
    # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4]
    domain = eval(params.domain)(**params.domain_params)  #only for acrobot

    #Load Representation
    representation = eval(params.representation)(
        domain, **params.representation_params)
    policy = eval(params.policy)(representation, **params.policy_params)
    agent = eval(params.agent)(policy,
                               representation,
                               discount_factor=domain.discount_factor,
                               **params.agent_params)

    opt = {}
    opt["exp_id"] = params.exp_id
    opt["path"] = params.results_path + getTimeStr() + "/"
    opt["max_steps"] = params.max_steps
    # opt["max_eps"] = params.max_eps

    opt["num_policy_checks"] = params.num_policy_checks
    opt["checks_per_policy"] = params.checks_per_policy

    opt["domain"] = domain
    opt["agent"] = agent

    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(param_path, opt["path"] + "params.yml")
    shutil.copy(inspect.getfile(eval(params.domain)),
                opt["path"] + "domain.py")
    shutil.copy(inspect.getfile(inspect.currentframe()),
                opt["path"] + "exper.py")

    return eval(params.experiment)(**opt)
예제 #4
0
def run_experiment_params(param_path="./params.yaml"):
    params = type("Parameters", (), load_yaml(param_path))

    def goalfn(s, goal):
        return -np.cos(s[0]) - np.cos(s[1] + s[0]) > goal

    def allMarkovReward(ps, ga, sr, gr):
        r = sr
        last_state = ps[len(ps) - 1]
        if any([goalfn(last_state, g) for g in ga]):
            r = gr
        return r

    # # Load domain
    def encode_trial():
        rewards = list(params.domain_params["goalArray"])
        print rewards
        encode = Encoding(rewards, goalfn)
        return encode.strict_encoding

    params.domain_params["goalfn"] = goalfn
    params.domain_params["rewardFunction"] = allMarkovReward
    params.domain_params["encodingFunction"] = encode_trial()
    # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4]
    domain = eval(params.domain)(**params.domain_params)  # only for acrobot

    # Load Representation
    representation = eval(params.representation)(domain, **params.representation_params)
    policy = eval(params.policy)(representation, **params.policy_params)
    agent = eval(params.agent)(policy, representation, discount_factor=domain.discount_factor, **params.agent_params)

    opt = {}
    opt["exp_id"] = params.exp_id
    opt["path"] = params.results_path + getTimeStr() + "/"
    opt["max_steps"] = params.max_steps
    # opt["max_eps"] = params.max_eps

    opt["num_policy_checks"] = params.num_policy_checks
    opt["checks_per_policy"] = params.checks_per_policy

    opt["domain"] = domain
    opt["agent"] = agent

    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(param_path, opt["path"] + "params.yml")
    shutil.copy(inspect.getfile(eval(params.domain)), opt["path"] + "domain.py")
    shutil.copy(inspect.getfile(inspect.currentframe()), opt["path"] + "exper.py")

    return eval(params.experiment)(**opt)
예제 #5
0
def run_experiment_params(param_path='./params.yaml'):
    params = type("Parameters", (), load_yaml(param_path))

    def goalfn(state, goal, radius=0.1):
        # Can be quickly modified to have a new radius per goal element
        position = state[:2]
        return (np.linalg.norm(np.array(position) - np.array(goal)) < radius)

    # # Load domain
    def encode_trial():
        rewards = list(params.domain_params['goalArray'])
        encode = Encoding(rewards, goalfn)
        return encode.strict_encoding

    params.domain_params['goalfn'] = goalfn
    params.domain_params['encodingFunction'] = encode_trial()
    # params.domain_params['goalArray'] = params.domain_params['goalArray'][::4]
    domain = eval(params.domain)(**params.domain_params)
    # domain = eval(params.domain)()

    #Load Representation
    representation = eval(params.representation)(
        domain, **params.representation_params)
    policy = eval(params.policy)(representation, **params.policy_params)
    agent = eval(params.agent)(policy,
                               representation,
                               discount_factor=domain.discount_factor,
                               **params.agent_params)

    opt = {}
    opt["exp_id"] = params.exp_id
    opt["path"] = params.results_path + getTimeStr() + "/"
    opt["max_steps"] = params.max_steps
    # opt["max_eps"] = params.max_eps

    opt["num_policy_checks"] = params.num_policy_checks
    opt["checks_per_policy"] = params.checks_per_policy

    opt["domain"] = domain
    opt["agent"] = agent

    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(param_path, opt["path"] + "params.yml")
    shutil.copy(inspect.getfile(eval(params.domain)),
                opt["path"] + "domain.py")
    shutil.copy(inspect.getfile(inspect.currentframe()),
                opt["path"] + "exper.py")

    return eval(params.experiment)(**opt)
def generate(mappath, goals=2, save=False, savepath="reward_locations"):
    """
    Returns a numpy array of goal positions (without repeats nor ordering).
    Goal positions are picked from empty spots on map.

    :param mappath: Path to GridWorld Map
    :param goals: Number of goals placed
    :param save: Boolean for saving array for repeatability
    """

    map_arr = np.loadtxt(mappath, dtype=np.uint8)
    possibles = np.argwhere(map_arr == 0)
    sample_indices = np.random.choice(len(possibles), goals, replace=False)
    goals = possibles[sample_indices]
    if save:
        if not os.path.exists(savepath):
            os.mkdir(savepath)
        path = os.path.join(savepath, getTimeStr())
        np.savetxt(path, goals, fmt="%1d")
    return goals
예제 #7
0
def generate(mappath, goals=2, save=False, savepath="reward_locations"):
    """
    Returns a numpy array of goal positions (without repeats nor ordering).
    Goal positions are picked from empty spots on map.

    :param mappath: Path to GridWorld Map
    :param goals: Number of goals placed
    :param save: Boolean for saving array for repeatability
    """

    map_arr = np.loadtxt(mappath, dtype=np.uint8)
    possibles = np.argwhere(map_arr == 0)
    sample_indices = np.random.choice(len(possibles), goals, replace=False)
    goals = possibles[sample_indices]
    if save:
        if not os.path.exists(savepath):
            os.mkdir(savepath)
        path = os.path.join(savepath, getTimeStr())
        np.savetxt(path, goals, fmt="%1d")
    return goals
예제 #8
0
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 50
    experiment = ExperimentSegment(**opt)
    return experiment


# ## DEBUG
# import ipdb; ipdb.set_trace()
# ########

if __name__ == '__main__':
    dirname, filename = os.path.split(os.path.abspath(__file__))

    experiment = make_experiment(
        1,
        path=dirname + "/Results/Experiments/9x9_2PathR1/" + getTimeStr() +
        "/")
    experiment.run(
        visualize_steps=False,  # should each learning step be shown?
        visualize_learning=False,  # show policy / value function?
        saveTrajectories=False)  # show performance runs?

    # visual.saveDomain(experiment, GridWorldInter, maze)
    # import ipdb; ipdb.set_trace()
    experiment.domain.showLearning(experiment.agent.representation)

    # experiment.plotTrials(save=True)
    experiment.saveWeights()
    experiment.plot(save=True)  #, y="reward")
    experiment.save()
예제 #9
0
import numpy as np
import rlpy.Tools.results 
from rlpy.Tools import getTimeStr
import pickle
import json
import os


'''TODOS:
 - Replace loads with default commands
 - Remove pickle, replace with JSON
 - Save Parameters'''

EPS, ENV_NOISE, SEG_RW, STEP_REW = range(4)
get_AUC = lambda res: trapz(res, dx=1) ##setting dx = 1 but shouldn't matter
DEFAULT_PATH = "./TwoPathExperiments/RESETResults/" + getTimeStr() + "/"
 
def default_params(params=None):
	if params is None:	
		params = {}

	if 'max_eps' not in params:
		params['max_eps'] = 400 

	if 'num_policy_checks' not in params:
		params['num_policy_checks'] = 40

	##DO NOT CHANGE
	params['eval_map'] = "9x9-2Path0.txt"
	######
	return params
예제 #10
0
    ## Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3, learn_rate_decay_mode='const')
    opt["max_eps"] = max_eps
    opt["checks_per_policy"] = checks_per_policy
    opt["num_policy_checks"] = num_policy_checks

    experiment = ExperimentSegment(**opt)
    return experiment


if __name__ == '__main__':
    dirname, filename = os.path.split(os.path.abspath(__file__))

    experiment = make_experiment(1, path=dirname +"/ResetResults/Experiments/9x9_2PathR1/" + getTimeStr() + "/")
    experiment.run(visualize_steps=False,  # should each learning step be shown?
                   visualize_learning=False,  # show policy / value function?
                   saveTrajectories=False)  # show performance runs?

    # visual.saveDomain(experiment, GridWorldInter, maze)
    # import ipdb; ipdb.set_trace()
    experiment.domain.showLearning(experiment.agent.representation)


    # experiment.plotTrials(save=True)
    # experiment.saveWeights()
    experiment.plot(save=True, x="learning_episode") #, y="reward")
    # experiment.save()

예제 #11
0
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 50
    experiment = ExperimentSegment(**opt)
    return experiment


# ## DEBUG
# import ipdb; ipdb.set_trace()
# ########


if __name__ == '__main__':
    experiment = make_experiment(1, path="./Results/Experiments/9x9_2Path0/" + getTimeStr() + "/")
    experiment.run(visualize_steps=False,  # should each learning step be shown?
                   visualize_learning=False,  # show policy / value function?
                   saveTrajectories=False)  # show performance runs?

    # visual.saveDomain(experiment, GridWorldInter, maze)
    # import ipdb; ipdb.set_trace()
    experiment.domain.showLearning(experiment.agent.representation)


    # experiment.plotTrials(save=True)
    experiment.plot(save=True) #, y="reward")
    experiment.save()