예제 #1
0
    def testValueBased(self):
        """ Test value-based learner.
        """
        mkt = SmartMarket(self.case)
        exp = MarketExperiment([], [], mkt)
        for g in self.case.generators:
            env = DiscreteMarketEnvironment([g], mkt)
            dim_state, num_actions = (10, 10)
            exp.tasks.append(ProfitTask(env, dim_state, num_actions))
            module = ActionValueTable(dim_state, num_actions)
            module.initialize(1.0)
#            module = ActionValueNetwork(dimState=1, numActions=4)
            learner = SARSA() #Q() QLambda()
#            learner.explorer = BoltzmannExplorer() # default is e-greedy.
            exp.agents.append(LearningAgent(module, learner))
        for _ in range(1000):
            exp.doInteractions(24) # interact with the env in batch mode
            for agent in exp.agents:
                agent.learn()
                agent.reset()
예제 #2
0
 def testValueBased(self):
     """ Test value-based learner.
     """
     mkt = SmartMarket(self.case)
     exp = MarketExperiment([], [], mkt)
     for g in self.case.generators:
         env = DiscreteMarketEnvironment([g], mkt)
         dim_state, num_actions = (10, 10)
         exp.tasks.append(ProfitTask(env, dim_state, num_actions))
         module = ActionValueTable(dim_state, num_actions)
         module.initialize(1.0)
         #            module = ActionValueNetwork(dimState=1, numActions=4)
         learner = SARSA()  #Q() QLambda()
         #            learner.explorer = BoltzmannExplorer() # default is e-greedy.
         exp.agents.append(LearningAgent(module, learner))
     for _ in range(1000):
         exp.doInteractions(24)  # interact with the env in batch mode
         for agent in exp.agents:
             agent.learn()
             agent.reset()
예제 #3
0
파일: rl.py 프로젝트: ZiiCee/pylon
    """ Create an environment for each agent with an asset and a market. """
    env = ParticipantEnvironment(g, mkt, n_offbids=2)

    """ Create a task for the agent to achieve. """
    task = ProfitTask(env)

    """ Build an artificial neural network for the agent. """
    net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False)
    #    net._setParameters(array([9]))

    """ Create a learning agent with a learning algorithm. """
    agent = LearningAgent(module=net, learner=ENAC())
    """ Initialize parameters (variance). """
    #    agent.setSigma([-1.5])
    """ Set learning options. """
    agent.learner.alpha = 2.0
    # agent.learner.rprop = True
    agent.actaspg = False
    #    agent.disableLearning()

    agents.append(agent)
    tasks.append(task)

""" The Experiment will coordintate the interaction of the given agents and
their associated tasks. """
experiment = MarketExperiment(tasks, agents, mkt)
experiment.setRenderer(ExperimentRenderer())

""" Instruct the experiment to coordinate a set number of interactions. """
experiment.doInteractions(3)
예제 #4
0
#    for t in ("state", "action", "reward"):
#        fileName = "%s-%s.table" % (a.name, t)
#        tmp_name = join(tableDir, fileName)
##        tmpfd, tmpName = tempfile.mkstemp(".table", prefix, tableDir)
##        os.close(tmpfd) # gets deleted
#        fd = file(tmpName, "w+b")
#        fd.write("# %s %s data - %s\n" % (a.name, t, timestr))
#        fd.close()
#        tableMap[t][a.name] = tmpName

# Execute interactions with the environment in batch mode.
t0 = time.time()
x = 0
batch = 2
while x <= 1000:
    experiment.doInteractions(batch)

    for i, agent in enumerate(experiment.agents):
        s, a, r = agent.history.getSequence(agent.history.getNumSequences() -
                                            1)

        pl.addData(i, x, scipy.mean(a))
        pl2.addData(i, x, scipy.mean(r))

        action, reward = agentMap[agent.name]
        agentMap[agent.name] = (scipy.r_[action,
                                         a.flatten()], scipy.r_[reward,
                                                                r.flatten()])

        #        for n, seq in (("state", s), ("action", a), ("reward", r)):
        #            tmpName = tableMap[n][agent.name]
예제 #5
0
파일: rl.py 프로젝트: oosterden/pylon
mkt = SmartMarket(case)

agents = []
tasks = []
for g in bus1.generators:
    """ Create an environment for each agent with an asset and a market. """
    env = ParticipantEnvironment(g, mkt, n_offbids=2)
    """ Create a task for the agent to achieve. """
    task = ProfitTask(env)
    """ Build an artificial neural network for the agent. """
    net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False)
    #    net._setParameters(array([9]))
    """ Create a learning agent with a learning algorithm. """
    agent = LearningAgent(module=net, learner=ENAC())
    """ Initialize parameters (variance). """
    #    agent.setSigma([-1.5])
    """ Set learning options. """
    agent.learner.alpha = 2.0
    # agent.learner.rprop = True
    agent.actaspg = False
    #    agent.disableLearning()

    agents.append(agent)
    tasks.append(task)
""" The Experiment will coordintate the interaction of the given agents and
their associated tasks. """
experiment = MarketExperiment(tasks, agents, mkt)
experiment.setRenderer(ExperimentRenderer())
""" Instruct the experiment to coordinate a set number of interactions. """
experiment.doInteractions(3)
예제 #6
0
파일: auction.py 프로젝트: Waqquas/pylon
#    for t in ("state", "action", "reward"):
#        fileName = "%s-%s.table" % (a.name, t)
#        tmp_name = join(tableDir, fileName)
##        tmpfd, tmpName = tempfile.mkstemp(".table", prefix, tableDir)
##        os.close(tmpfd) # gets deleted
#        fd = file(tmpName, "w+b")
#        fd.write("# %s %s data - %s\n" % (a.name, t, timestr))
#        fd.close()
#        tableMap[t][a.name] = tmpName

# Execute interactions with the environment in batch mode.
t0 = time.time()
x = 0
batch = 2
while x <= 1000:
    experiment.doInteractions(batch)

    for i, agent in enumerate(experiment.agents):
        s,a,r = agent.history.getSequence(agent.history.getNumSequences() - 1)

        pl.addData(i, x, scipy.mean(a))
        pl2.addData(i, x, scipy.mean(r))

        action, reward = agentMap[agent.name]
        agentMap[agent.name] = (scipy.r_[action, a.flatten()],
                                scipy.r_[reward, r.flatten()])

#        for n, seq in (("state", s), ("action", a), ("reward", r)):
#            tmpName = tableMap[n][agent.name]
#            fd = file(tmpName, "a+b")
#            for i in range(batch):