def build(self, direction, x, y): new_tako = tako.Tako(direction, x, y, self) for gen in range(len(self.strand_1)): self.strand_1[gen].read(self.strand_2[gen], new_tako) #take care of net & make agent new_tako.net.sortModules() learner = ENAC() new_tako.agent = LearningAgent(new_tako.net, learner) return new_tako
def createStupidAnimat(self, x, y): f = open('neuro.net', 'r') trained_net = pickle.load(f) learner = ENAC() learner._setLearningRate(0.03) brain = BrainController(trained_net) new_x = x + random.randint(-3, 3) if new_x > 79: new_x = 79 elif new_x < 0: new_x = 0 new_y = y + random.randint(-3, 3) if new_y > 79: new_y = 79 elif new_y < 0: new_y = 0 sa = StupidAnimat(new_x, new_y, brain, learner, self) sa.brain.validate_net() world = World(self) task = InteractTask(world, sa) self.stupid_animats.append(sa) self.tasks.append(task)
# Create a case environment specifying the load profile. env = pyreto.CaseEnvironment(case, p1h) # Create an episodic cost minimisation task. task = pyreto.MinimiseCostTask(env) # Create a network for approximating the agent's policy function that maps # system demand to generator set-points.. nb = len([bus for bus in case.buses if bus.type == pylon.PQ]) ng = len([g for g in case.online_generators if g.bus.type != pylon.REFERENCE]) net = buildNetwork(nb, ng, bias=False) # Create an agent and select an episodic learner. #learner = Reinforce() learner = ENAC() #learner.gd.rprop = True ## only relevant for RP #learner.gd.deltamin = 0.0001 ##agent.learner.gd.deltanull = 0.05 ## only relevant for BP #learner.gd.alpha = 0.01 #learner.gd.momentum = 0.9 agent = LearningAgent(net, learner) # Adjust some parameters of the NormalExplorer. sigma = [50.0] * ng learner.explorer.sigma = sigma #learner.explorer.epsilon = 0.01 # default: 0.3 #learner.learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs)
if plotting: from pylab import draw, ion, title, plot, figure, clf #@UnresolvedImport ion() # create environment env = SimpleEnvironment() env.setNoise(0.9) # create task task = MinimizeTask(env) # create controller network (flat network) net = buildNetwork(1, 1, bias=False) net._setParameters(array([-11.])) # create agent with controller and learner agent = PolicyGradientAgent(net, ENAC()) # initialize parameters (variance) agent.setSigma([-2.]) # learning options agent.learner.alpha = 2. # agent.learner.rprop = True agent.actaspg = False experiment = EpisodicExperiment(task, agent) plots = zeros((1000, agent.module.paramdim+1), float) for updates in range(1000): agent.reset() # training step experiment.doEpisodes(10)
from pybrain.rl.learners import ENAC from pybrain.rl.experiments import EpisodicExperiment batch=50 #number of samples per learning step prnts=4 #number of learning steps after results are printed epis=int(4000/batch/prnts) #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, ENAC()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) state, action, reward = agent.learner.dataset.getSequence(agent.learner.dataset.getNumSequences()-1) et.printResults(reward.sum(), runs, updates) et.addExps() et.showExps()
case = Case(name="1Bus", buses=[bus1]) """ The market will clear submitted offers/bids and return dispatch info. """ mkt = SmartMarket(case) agents = [] tasks = [] for g in bus1.generators: """ Create an environment for each agent with an asset and a market. """ env = ParticipantEnvironment(g, mkt, n_offbids=2) """ Create a task for the agent to achieve. """ task = ProfitTask(env) """ Build an artificial neural network for the agent. """ net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False) # net._setParameters(array([9])) """ Create a learning agent with a learning algorithm. """ agent = LearningAgent(module=net, learner=ENAC()) """ Initialize parameters (variance). """ # agent.setSigma([-1.5]) """ Set learning options. """ agent.learner.alpha = 2.0 # agent.learner.rprop = True agent.actaspg = False # agent.disableLearning() agents.append(agent) tasks.append(task) """ The Experiment will coordintate the interaction of the given agents and their associated tasks. """ experiment = MarketExperiment(tasks, agents, mkt) experiment.setRenderer(ExperimentRenderer()) """ Instruct the experiment to coordinate a set number of interactions. """