Beispiel #1
0
def _make_experiment(exp_id=1, path="./Results/Tmp/test_SystemAdministrator"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    ## Domain:
    domain = SystemAdministrator()

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = IncrementalTabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    agent = SARSA(representation=representation,
                  policy=policy,
                  discount_factor=domain.discount_factor,
                  learn_rate=0.1)
    checks_per_policy = 2
    max_steps = 20
    num_policy_checks = 2
    experiment = Experiment(**locals())
    return experiment
Beispiel #2
0
def make_experiment(exp_id=1, path="."):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    maze = '4x5.txt'
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = IncrementalTabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(representation=representation,
                         policy=policy,
                         discount_factor=domain.discount_factor,
                         initial_learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Beispiel #3
0
    def runTIRL(self, N=5, w=2, pruning=0.5):
        opt = deepcopy(self.opt_template)
        dist = self.getIRLDist(N=N)
        ac = self.getTSCWaypoints(N, w, pruning)
        domain = self.createStateDomain(
            waypoints=ac,
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL(
                x, y, z, w, dist))
        opt["domain"] = domain
        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=self.createStateDomain(
                           waypoints=self.env_template["consumable"]),
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Beispiel #4
0
    def runIRL(self, N=5):
        opt = deepcopy(self.opt_template)

        dist = self.getIRLDist(N=N)
        bdist = self.getIRLDist(N=N, rand=True)

        #print dist-bdist

        domain = self.createMarkovDomain(
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.
            maxEntReward(x, y, z, w, dist - bdist))
        opt["domain"] = domain

        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        performance_domain = self.createMarkovDomain()

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=performance_domain,
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Beispiel #5
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=120,
                    initial_learn_rate=.06,
                    discretization=50):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1

    domain = IntruderMonitoring()
    opt["domain"] = domain
    representation = IncrementalTabular(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #6
0
def test_cell_expansion():
    """ Ensure start with 0 cells, add one for each state uniquely. """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname = os.path.join(mapDir, "4x5.txt")  # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)

    rep = IncrementalTabular(domain, discretization=100)
    assert rep.features_num == 0  # start with 0 cells
    sOrigin = np.array([0, 0])
    s2 = np.array([1, 2])
    terminal = False  # nonterminal state
    a = 1  # arbitrary action

    # Expect to add feats for these newly seen states
    numAdded = rep.pre_discover(sOrigin, terminal, a, s2, terminal)
    assert numAdded == 2
    assert rep.features_num == 2
    phiVecOrigin = rep.phi(sOrigin, terminal)
    phiVec2 = rep.phi(s2, terminal)
    assert sum(phiVecOrigin) == 1
    assert sum(phiVec2) == 1
    phiVecOrigin2 = rep.phi(np.array([0, 0]), terminal=False)
    assert rep.features_num == 2  # didn't duplicate the feature
    assert sum(phiVecOrigin2) == 1

    # Make sure we dont duplicate feats anywhere
    numAdded = rep.pre_discover(np.array([0, 0]), terminal, a, s2, terminal)
    assert numAdded == 0
    assert rep.features_num == 2
Beispiel #7
0
def test_cell_expansion():
    """ Ensure start with 0 cells, add one for each state uniquely. """
    mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps")
    mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states
    domain = GridWorld(mapname=mapname)
    
    rep = IncrementalTabular(domain, discretization=100)
    assert rep.features_num == 0 # start with 0 cells
    sOrigin = np.array([0,0])
    s2 = np.array([1,2])
    terminal = False # nonterminal state
    a = 1 # arbitrary action
    
    # Expect to add feats for these newly seen states
    numAdded = rep.pre_discover(sOrigin, terminal, a, s2, terminal)
    assert numAdded == 2
    assert rep.features_num == 2
    phiVecOrigin = rep.phi(sOrigin, terminal)
    phiVec2 = rep.phi(s2, terminal)
    assert sum(phiVecOrigin) == 1
    assert sum(phiVec2) == 1
    phiVecOrigin2 = rep.phi(np.array([0,0]), terminal=False)
    assert rep.features_num == 2 # didn't duplicate the feature
    assert sum(phiVecOrigin2) == 1
    
    # Make sure we dont duplicate feats anywhere
    numAdded = rep.pre_discover(np.array([0,0]), terminal, a, s2, terminal)
    assert numAdded == 0
    assert rep.features_num == 2
Beispiel #8
0
def make_experiment(exp_id=1, path="./results/ITab"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    domain = hack_domain.HackDomain()
    opt["domain"] = domain

    # Representation
    global representation
    representation = IncrementalTabular(domain, discretization=20)
    representation = representation_pickle(representation, action=1)
    opt["path"] = "./results/ITab"
    """
    representation = RBF(domain, num_rbfs=int(206.),
                         resolution_max=25., resolution_min=25.,
                         const_feature=False, normalize=True, seed=exp_id)
    opt["path"] = "./results/RBF"
    """

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    global agent
    agent = SARSA(representation=representation,
                  policy=policy,
                  discount_factor=domain.discount_factor,
                  initial_learn_rate=0.1,
                  learn_rate_decay_mode="boyan",
                  boyan_N0=100,
                  lambda_=0.4)
    agent = agent_pickle(agent, action=1)
    opt["agent"] = agent
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Beispiel #9
0
    def getIRLTDist(self, waypoints, N=5, rand=False):
        sd = self.createStateDomain(self.env_template["consumable"])
        representation = IncrementalTabular(
            sd, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])

        if not rand:
            d = GoalPathPlanner(sd,
                                representation,
                                policy,
                                steps=self.opt_template["max_steps"])
        else:
            d = GoalPathPlanner(sd, representation, policy, steps=100)

        trajs = d.generateTrajectories(N=N)
        return calculateStateTemporalDist(np.shape(sd.map), trajs, waypoints)
Beispiel #10
0
def generate_multinomial_experiment(exp_id,
                                    agent_paths,
                                    path,
                                    unique=True,
                                    expdomain=None):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    opt["max_steps"] = 8000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)

    for a in agents:
        assert type(a.policy).__name__ == "DynamicsConfidenceGreedy"
        a.policy.epsilon = 0

    if expdomain:
        domain = expdomain(mapname=mapname, noise=0.1)
    else:
        domain = GridWorldMixed(mapname=mapname, noise=0.1)
    representation = IncrementalTabular(domain)
    policy = MAgentMultinomial(representation, agents)  # , tau=.1)

    print "$" * 10
    print "You are currently running {}".format(policy.__class__)
    opt['agent'] = NoopAgent(representation=representation, policy=policy)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    return experiment
Beispiel #11
0
def grid_world1_sliding(exp_id=3, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.
        slidingWindowEncoding(x, 3),
        noise=noise)

    opt["domain"] = domain

    # Representation
    representation = IncrementalTabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=exp)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Beispiel #12
0
def make_experiment(agent_paths=["./"],
                    sublearning=False,
                    exp_id=3,
                    path="./Results/Confidence2/",
                    temp=0.10517212721767522,
                    discount_factor=0.7,
                    lambda_=0.0,
                    init_state=None):
    opt = {}
    opt["path"] = os.path.join(path, get_time_str())
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 20
    # start_at = np.array([4, 6])

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        a.policy.epsilon = i * 0.02 + 0.1
        # a.learn_rate_decay_mode = 'boyan'
        # a.learn_rate = a.initial_learn_rate = 0.9
        # a.boyan_N0 = 3000
        a.learn_rate_decay_mode = 'dabney'

    domain = RCCarModified(noise=0.1, init_state=(-2, 0.8, 0, 2.5))
    representation = IncrementalTabular(domain)
    policy = MultiAgentConfidence(representation, agents, tau=.1)
    print "$" * 10
    print "You are currently running {}".format(policy.__class__)
    opt['agent'] = MetaAgent(representation=representation,
                             policy=policy,
                             learn_rate_decay_mode="const")
    opt['domain'] = domain
    experiment = Experiment(**opt)
    return experiment
Beispiel #13
0
    def getTSCWaypoints(self, N=5, w=2, pruning=0.5):
        sd = self.createStateDomain(self.env_template["consumable"])
        representation = IncrementalTabular(
            sd, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        d = GoalPathPlanner(sd,
                            representation,
                            policy,
                            steps=self.opt_template["max_steps"])
        trajs = d.generateTrajectories(N=N)
        a = TransitionStateClustering(window_size=w)

        for t in trajs:
            N = len(t)
            demo = np.zeros((N, 2))
            for i in range(0, N):
                demo[i, :] = t[i][0:2]
            a.addDemonstration(demo)

        a.fit(normalize=False, pruning=pruning)
        dist = calculateStateDist(np.shape(sd.map), trajs)
        return discrete2DClustersToPoints(a.model, dist, radius=1)
Beispiel #14
0
    def runSliding(self, k=3):
        opt = deepcopy(self.opt_template)
        domain = self.createSlidingDomain(k)
        opt["domain"] = domain
        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Beispiel #15
0
def grid_world1_trb(exp_id=6, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 20

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        binary=True,
        noise=noise)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N, 2))
        for i in range(0, N):
            demo[i, :] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    dist = calculateStateDist((10, 7), trajs)
    ac = discrete2DClustersToPoints(a.model, dist, radius=1)

    #ac = [(round(a.means_[0][0]),round(a.means_[0][1])) for a in a.model]

    print ac

    #reinitialize
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, ac),
        noise=noise,
        binary=True)
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.3)
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["domain"] = domain

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Beispiel #16
0
def make_experiment(agent_paths="./",
                    pretrained=False,
                    sublearning=False,
                    yaml_file=None,
                    exp_id=3,
                    path="./Results/TestVoting/",
                    init_state=None):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 80000
    opt["num_policy_checks"] = 40
    opt["checks_per_policy"] = 10
    # start_at = np.array([4, 6])

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    domain = RCCarModified(noise=0.1,
                           # random_start=True,
                           # init_state=init_state
                           )

    # agent_1 = loadagent("QL") # most likely preloaded
    # agent_2 = loadagent("SARSA")
    # agent_3 = loadagent("NAC")
    # agents = [agent_1, agent_2, agent_3]
    agents = load_all_agents(agent_paths,
                             pretrained=pretrained,
                             yaml_file=yaml_file)

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        a.policy.epsilon = i * 0.03 + 0.1

    # import ipdb; ipdb.set_trace()
    representation = IncrementalTabular(domain)  #This doesn't matter
    policy = MultiAgentVoting(representation, agents, tau=.7)
    opt['agent'] = MetaAgent(
        representation=representation,
        policy=policy,
    )
    opt['domain'] = domain
    experiment = Experiment(**opt)

    #Seeding to match standard experiment
    for agent in agents:
        agent.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.init_randomization()
        # agent.representation.random_state = np.random.RandomState(
        #     experiment.randomSeeds[experiment.exp_id - 1])
        # agent.representation.init_randomization() #init_randomization is called on instantiation
        agent.policy.random_state = np.random.RandomState(
            experiment.randomSeeds[experiment.exp_id - 1])
        agent.policy.init_randomization()

    for i, a in enumerate(agents):
        # import ipdb; ipdb.set_trace()
        print a.policy.epsilon

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    param_path = os.path.join(agent_paths[0], yaml_file)
    shutil.copy(param_path, path_join("params.yml"))
    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    # import ipdb; ipdb.set_trace()

    return experiment