def get_enac_experiment(case, minor=1): gen = case.generators profile = array([1.0, 1.0]) maxSteps = len(profile) initalSigma = 0.0 sigmaOffset = -4.0 if minor == 1: decay = 0.999 learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs, default: 0.1) elif minor == 2: decay = 0.997 learningRate = 0.005 elif minor == 3: decay = 0.999 learningRate = 0.05 elif minor == 4: decay = 0.999 learningRate = 0.005 else: raise ValueError, "Invalid minor version: %d" % minor market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType) experiment = pyreto.continuous.MarketExperiment([], [], market, profile) for g in gen[0:2]: learner = ENAC() # learner = Reinforce() # learner.gd.rprop = False # only relevant for BP learner.learningRate = learningRate # learner.gd.alpha = 0.0001 # learner.gd.alphadecay = 0.9 # learner.gd.momentum = 0.9 # only relevant for RP # learner.gd.deltamin = 0.0001 task, agent = get_continuous_task_agent([g], market, nOffer, markupMax, withholdMax, maxSteps, learner) learner.explorer = ManualNormalExplorer(agent.module.outdim, initalSigma, decay, sigmaOffset) experiment.tasks.append(task) experiment.agents.append(agent) # Passive agent. task, agent = get_neg_one_task_agent(gen[2:3], market, nOffer, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def build(self, direction, x, y): new_tako = tako.Tako(direction, x, y, self) for gen in range(len(self.strand_1)): self.strand_1[gen].read(self.strand_2[gen], new_tako) #take care of net & make agent new_tako.net.sortModules() learner = ENAC() new_tako.agent = LearningAgent(new_tako.net, learner) return new_tako
def get_enac_experiment(case): locAdj = "ac" initalSigma = 0.0 sigmaOffset = -5.0 decay = 0.995 learningRate = 0.005 market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit, auctionType=auctionType, locationalAdjustment=locAdj) experiment = \ pyreto.continuous.MarketExperiment([], [], market, branchOutages=None) portfolios, sync_cond = get_portfolios3() for gidx in portfolios: g = [case.generators[i] for i in gidx] learner = ENAC() learner.learningRate = learningRate task, agent = get_continuous_task_agent(g, market, nOffer, markupMax, withholdMax, maxSteps, learner) learner.explorer = ManualNormalExplorer(agent.module.outdim, initalSigma, decay, sigmaOffset) experiment.tasks.append(task) experiment.agents.append(agent) # Have an agent bid at marginal cost (0.0) for the sync cond. passive = [case.generators[i] for i in sync_cond] passive[0].p_min = 0.001 # Avoid invalid offer withholding. passive[0].p_max = 0.002 task, agent = get_neg_one_task_agent(passive, market, 1, maxSteps) experiment.tasks.append(task) experiment.agents.append(agent) return experiment
def createStupidAnimat(self, x, y): f = open('neuro.net', 'r') trained_net = pickle.load(f) learner = ENAC() learner._setLearningRate(0.03) brain = BrainController(trained_net) new_x = x + random.randint(-3, 3) if new_x > 79: new_x = 79 elif new_x < 0: new_x = 0 new_y = y + random.randint(-3, 3) if new_y > 79: new_y = 79 elif new_y < 0: new_y = 0 sa = StupidAnimat(new_x, new_y, brain, learner, self) sa.brain.validate_net() world = World(self) task = InteractTask(world, sa) self.stupid_animats.append(sa) self.tasks.append(task)
# Create a case environment specifying the load profile. env = pyreto.CaseEnvironment(case, p1h) # Create an episodic cost minimisation task. task = pyreto.MinimiseCostTask(env) # Create a network for approximating the agent's policy function that maps # system demand to generator set-points.. nb = len([bus for bus in case.buses if bus.type == pylon.PQ]) ng = len([g for g in case.online_generators if g.bus.type != pylon.REFERENCE]) net = buildNetwork(nb, ng, bias=False) # Create an agent and select an episodic learner. #learner = Reinforce() learner = ENAC() #learner.gd.rprop = True ## only relevant for RP #learner.gd.deltamin = 0.0001 ##agent.learner.gd.deltanull = 0.05 ## only relevant for BP #learner.gd.alpha = 0.01 #learner.gd.momentum = 0.9 agent = LearningAgent(net, learner) # Adjust some parameters of the NormalExplorer. sigma = [50.0] * ng learner.explorer.sigma = sigma #learner.explorer.epsilon = 0.01 # default: 0.3 #learner.learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs)
if plotting: from pylab import draw, ion, title, plot, figure, clf #@UnresolvedImport ion() # create environment env = SimpleEnvironment() env.setNoise(0.9) # create task task = MinimizeTask(env) # create controller network (flat network) net = buildNetwork(1, 1, bias=False) net._setParameters(array([-11.])) # create agent with controller and learner agent = PolicyGradientAgent(net, ENAC()) # initialize parameters (variance) agent.setSigma([-2.]) # learning options agent.learner.alpha = 2. # agent.learner.rprop = True agent.actaspg = False experiment = EpisodicExperiment(task, agent) plots = zeros((1000, agent.module.paramdim+1), float) for updates in range(1000): agent.reset() # training step experiment.doEpisodes(10)
from pybrain.rl.agents import LearningAgent import pickle import time # Create environment sub_env = Environment(20, 20) world = World(sub_env) # Brain for the animat, we have already trained the data f = open('neuro.net', 'r') trained_net = pickle.load(f) brain = BrainController(trained_net) # Learning method we use #learner = PolicyGradientLearner() learner = ENAC() learner._setLearningRate(0.2) # Create an animat animat = StupidAnimat(trained_net, learner, sub_env) # Establish a task task = InteractTask(world, animat) brain.validate_net() experiment = Experiment(task, animat) while True: experiment.doInteractions(10000) animat.learn() animat.reset() brain.validate_net() time.sleep(3)
from pybrain.rl.learners import ENAC from pybrain.rl.experiments import EpisodicExperiment batch=50 #number of samples per learning step prnts=4 #number of learning steps after results are printed epis=int(4000/batch/prnts) #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, ENAC()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) state, action, reward = agent.learner.dataset.getSequence(agent.learner.dataset.getNumSequences()-1) et.printResults(reward.sum(), runs, updates) et.addExps() et.showExps()
case = Case(name="1Bus", buses=[bus1]) """ The market will clear submitted offers/bids and return dispatch info. """ mkt = SmartMarket(case) agents = [] tasks = [] for g in bus1.generators: """ Create an environment for each agent with an asset and a market. """ env = ParticipantEnvironment(g, mkt, n_offbids=2) """ Create a task for the agent to achieve. """ task = ProfitTask(env) """ Build an artificial neural network for the agent. """ net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False) # net._setParameters(array([9])) """ Create a learning agent with a learning algorithm. """ agent = LearningAgent(module=net, learner=ENAC()) """ Initialize parameters (variance). """ # agent.setSigma([-1.5]) """ Set learning options. """ agent.learner.alpha = 2.0 # agent.learner.rprop = True agent.actaspg = False # agent.disableLearning() agents.append(agent) tasks.append(task) """ The Experiment will coordintate the interaction of the given agents and their associated tasks. """ experiment = MarketExperiment(tasks, agents, mkt) experiment.setRenderer(ExperimentRenderer()) """ Instruct the experiment to coordinate a set number of interactions. """