def testValueBased(self): """ Test value-based learner. """ mkt = SmartMarket(self.case) exp = MarketExperiment([], [], mkt) for g in self.case.generators: env = DiscreteMarketEnvironment([g], mkt) dim_state, num_actions = (10, 10) exp.tasks.append(ProfitTask(env, dim_state, num_actions)) module = ActionValueTable(dim_state, num_actions) module.initialize(1.0) # module = ActionValueNetwork(dimState=1, numActions=4) learner = SARSA() #Q() QLambda() # learner.explorer = BoltzmannExplorer() # default is e-greedy. exp.agents.append(LearningAgent(module, learner)) for _ in range(1000): exp.doInteractions(24) # interact with the env in batch mode for agent in exp.agents: agent.learn() agent.reset()
""" Create an environment for each agent with an asset and a market. """ env = ParticipantEnvironment(g, mkt, n_offbids=2) """ Create a task for the agent to achieve. """ task = ProfitTask(env) """ Build an artificial neural network for the agent. """ net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False) # net._setParameters(array([9])) """ Create a learning agent with a learning algorithm. """ agent = LearningAgent(module=net, learner=ENAC()) """ Initialize parameters (variance). """ # agent.setSigma([-1.5]) """ Set learning options. """ agent.learner.alpha = 2.0 # agent.learner.rprop = True agent.actaspg = False # agent.disableLearning() agents.append(agent) tasks.append(task) """ The Experiment will coordintate the interaction of the given agents and their associated tasks. """ experiment = MarketExperiment(tasks, agents, mkt) experiment.setRenderer(ExperimentRenderer()) """ Instruct the experiment to coordinate a set number of interactions. """ experiment.doInteractions(3)
# for t in ("state", "action", "reward"): # fileName = "%s-%s.table" % (a.name, t) # tmp_name = join(tableDir, fileName) ## tmpfd, tmpName = tempfile.mkstemp(".table", prefix, tableDir) ## os.close(tmpfd) # gets deleted # fd = file(tmpName, "w+b") # fd.write("# %s %s data - %s\n" % (a.name, t, timestr)) # fd.close() # tableMap[t][a.name] = tmpName # Execute interactions with the environment in batch mode. t0 = time.time() x = 0 batch = 2 while x <= 1000: experiment.doInteractions(batch) for i, agent in enumerate(experiment.agents): s, a, r = agent.history.getSequence(agent.history.getNumSequences() - 1) pl.addData(i, x, scipy.mean(a)) pl2.addData(i, x, scipy.mean(r)) action, reward = agentMap[agent.name] agentMap[agent.name] = (scipy.r_[action, a.flatten()], scipy.r_[reward, r.flatten()]) # for n, seq in (("state", s), ("action", a), ("reward", r)): # tmpName = tableMap[n][agent.name]
mkt = SmartMarket(case) agents = [] tasks = [] for g in bus1.generators: """ Create an environment for each agent with an asset and a market. """ env = ParticipantEnvironment(g, mkt, n_offbids=2) """ Create a task for the agent to achieve. """ task = ProfitTask(env) """ Build an artificial neural network for the agent. """ net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False) # net._setParameters(array([9])) """ Create a learning agent with a learning algorithm. """ agent = LearningAgent(module=net, learner=ENAC()) """ Initialize parameters (variance). """ # agent.setSigma([-1.5]) """ Set learning options. """ agent.learner.alpha = 2.0 # agent.learner.rprop = True agent.actaspg = False # agent.disableLearning() agents.append(agent) tasks.append(task) """ The Experiment will coordintate the interaction of the given agents and their associated tasks. """ experiment = MarketExperiment(tasks, agents, mkt) experiment.setRenderer(ExperimentRenderer()) """ Instruct the experiment to coordinate a set number of interactions. """ experiment.doInteractions(3)
# for t in ("state", "action", "reward"): # fileName = "%s-%s.table" % (a.name, t) # tmp_name = join(tableDir, fileName) ## tmpfd, tmpName = tempfile.mkstemp(".table", prefix, tableDir) ## os.close(tmpfd) # gets deleted # fd = file(tmpName, "w+b") # fd.write("# %s %s data - %s\n" % (a.name, t, timestr)) # fd.close() # tableMap[t][a.name] = tmpName # Execute interactions with the environment in batch mode. t0 = time.time() x = 0 batch = 2 while x <= 1000: experiment.doInteractions(batch) for i, agent in enumerate(experiment.agents): s,a,r = agent.history.getSequence(agent.history.getNumSequences() - 1) pl.addData(i, x, scipy.mean(a)) pl2.addData(i, x, scipy.mean(r)) action, reward = agentMap[agent.name] agentMap[agent.name] = (scipy.r_[action, a.flatten()], scipy.r_[reward, r.flatten()]) # for n, seq in (("state", s), ("action", a), ("reward", r)): # tmpName = tableMap[n][agent.name] # fd = file(tmpName, "a+b") # for i in range(batch):