Example #1
0
def get_enac_experiment(case, minor=1):
    gen = case.generators

    profile = array([1.0, 1.0])
    maxSteps = len(profile)
    initalSigma = 0.0
    sigmaOffset = -4.0

    if minor == 1:
        decay = 0.999
        learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs, default: 0.1)
    elif minor == 2:
        decay = 0.997
        learningRate = 0.005
    elif minor == 3:
        decay = 0.999
        learningRate = 0.05
    elif minor == 4:
        decay = 0.999
        learningRate = 0.005
    else:
        raise ValueError, "Invalid minor version: %d" % minor

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType)
    experiment = pyreto.continuous.MarketExperiment([], [], market, profile)

    for g in gen[0:2]:
        learner = ENAC()
#        learner = Reinforce()
#        learner.gd.rprop = False
        # only relevant for BP
        learner.learningRate = learningRate
#        learner.gd.alpha = 0.0001
    #    learner.gd.alphadecay = 0.9
    #    learner.gd.momentum = 0.9
        # only relevant for RP
    #    learner.gd.deltamin = 0.0001


        task, agent = get_continuous_task_agent([g], market, nOffer, markupMax,
                                                withholdMax, maxSteps, learner)

        learner.explorer = ManualNormalExplorer(agent.module.outdim,
                                                initalSigma, decay,
                                                sigmaOffset)

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    # Passive agent.
    task, agent = get_neg_one_task_agent(gen[2:3], market, nOffer, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
Example #2
0
 def build(self, direction, x, y):
     new_tako = tako.Tako(direction, x, y, self)
     for gen in range(len(self.strand_1)):
         self.strand_1[gen].read(self.strand_2[gen], new_tako)
     #take care of net & make agent
     new_tako.net.sortModules()
     learner = ENAC()
     new_tako.agent = LearningAgent(new_tako.net, learner)
     return new_tako
Example #3
0
def get_enac_experiment(case):

    locAdj = "ac"
    initalSigma = 0.0
    sigmaOffset = -5.0
    decay = 0.995
    learningRate = 0.005

    market = pyreto.SmartMarket(case, priceCap=cap, decommit=decommit,
                                auctionType=auctionType,
                                locationalAdjustment=locAdj)

    experiment = \
        pyreto.continuous.MarketExperiment([], [], market, branchOutages=None)

    portfolios, sync_cond = get_portfolios3()

    for gidx in portfolios:
        g = [case.generators[i] for i in gidx]

        learner = ENAC()
        learner.learningRate = learningRate

        task, agent = get_continuous_task_agent(g, market, nOffer, markupMax,
                                                withholdMax, maxSteps, learner)

        learner.explorer = ManualNormalExplorer(agent.module.outdim,
                                                initalSigma, decay,
                                                sigmaOffset)

        experiment.tasks.append(task)
        experiment.agents.append(agent)

    # Have an agent bid at marginal cost (0.0) for the sync cond.
    passive = [case.generators[i] for i in sync_cond]
    passive[0].p_min = 0.001 # Avoid invalid offer withholding.
    passive[0].p_max = 0.002
    task, agent = get_neg_one_task_agent(passive, market, 1, maxSteps)
    experiment.tasks.append(task)
    experiment.agents.append(agent)

    return experiment
Example #4
0
 def createStupidAnimat(self, x, y):
     f = open('neuro.net', 'r')
     trained_net = pickle.load(f)
     learner = ENAC()
     learner._setLearningRate(0.03)
     brain = BrainController(trained_net)
     new_x = x + random.randint(-3, 3)
     if new_x > 79:
         new_x = 79
     elif new_x < 0:
         new_x = 0
     new_y = y + random.randint(-3, 3)
     if new_y > 79:
         new_y = 79
     elif new_y < 0:
         new_y = 0
     sa = StupidAnimat(new_x, new_y, brain, learner, self)
     sa.brain.validate_net()
     world = World(self)
     task = InteractTask(world, sa)
     self.stupid_animats.append(sa)
     self.tasks.append(task)
Example #5
0
# Create a case environment specifying the load profile.
env = pyreto.CaseEnvironment(case, p1h)

# Create an episodic cost minimisation task.
task = pyreto.MinimiseCostTask(env)

# Create a network for approximating the agent's policy function that maps
# system demand to generator set-points..
nb = len([bus for bus in case.buses if bus.type == pylon.PQ])
ng = len([g for g in case.online_generators if g.bus.type != pylon.REFERENCE])
net = buildNetwork(nb, ng, bias=False)

# Create an agent and select an episodic learner.
#learner = Reinforce()
learner = ENAC()
#learner.gd.rprop = True
## only relevant for RP
#learner.gd.deltamin = 0.0001
##agent.learner.gd.deltanull = 0.05
## only relevant for BP
#learner.gd.alpha = 0.01
#learner.gd.momentum = 0.9

agent = LearningAgent(net, learner)

# Adjust some parameters of the NormalExplorer.
sigma = [50.0] * ng
learner.explorer.sigma = sigma
#learner.explorer.epsilon = 0.01 # default: 0.3
#learner.learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs)
Example #6
0

if plotting:
    from pylab import draw, ion, title, plot, figure, clf #@UnresolvedImport
    ion()   

# create environment
env = SimpleEnvironment()
env.setNoise(0.9)
# create task
task = MinimizeTask(env)
# create controller network (flat network)
net = buildNetwork(1, 1, bias=False)
net._setParameters(array([-11.]))
# create agent with controller and learner
agent = PolicyGradientAgent(net, ENAC())
# initialize parameters (variance)
agent.setSigma([-2.])
# learning options
agent.learner.alpha = 2.
# agent.learner.rprop = True
agent.actaspg = False
experiment = EpisodicExperiment(task, agent)


plots = zeros((1000, agent.module.paramdim+1), float)

for updates in range(1000):
    agent.reset()
    # training step
    experiment.doEpisodes(10)
Example #7
0
from pybrain.rl.agents import LearningAgent
import pickle
import time

# Create environment
sub_env = Environment(20, 20)
world = World(sub_env)

# Brain for the animat, we have already trained the data
f = open('neuro.net', 'r')
trained_net = pickle.load(f)
brain = BrainController(trained_net)

# Learning method we use
#learner = PolicyGradientLearner()
learner = ENAC()
learner._setLearningRate(0.2)
# Create an animat
animat = StupidAnimat(trained_net, learner, sub_env)

# Establish a task
task = InteractTask(world, animat)

brain.validate_net()
experiment = Experiment(task, animat)
while True:
    experiment.doInteractions(10000)
    animat.learn()
    animat.reset()
    brain.validate_net()
    time.sleep(3)
Example #8
0
from pybrain.rl.learners import ENAC
from pybrain.rl.experiments import EpisodicExperiment

batch=50 #number of samples per learning step
prnts=4 #number of learning steps after results are printed
epis=int(4000/batch/prnts) #number of roleouts
numbExp=10 #number of experiments
et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    env = CartPoleEnvironment()    
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = LearningAgent(net, ENAC())
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        state, action, reward = agent.learner.dataset.getSequence(agent.learner.dataset.getNumSequences()-1)
        et.printResults(reward.sum(), runs, updates)
    et.addExps()
et.showExps()
Example #9
0
case = Case(name="1Bus", buses=[bus1])
""" The market will clear submitted offers/bids and return dispatch info. """
mkt = SmartMarket(case)

agents = []
tasks = []
for g in bus1.generators:
    """ Create an environment for each agent with an asset and a market. """
    env = ParticipantEnvironment(g, mkt, n_offbids=2)
    """ Create a task for the agent to achieve. """
    task = ProfitTask(env)
    """ Build an artificial neural network for the agent. """
    net = buildNetwork(task.outdim, task.indim, bias=False, outputbias=False)
    #    net._setParameters(array([9]))
    """ Create a learning agent with a learning algorithm. """
    agent = LearningAgent(module=net, learner=ENAC())
    """ Initialize parameters (variance). """
    #    agent.setSigma([-1.5])
    """ Set learning options. """
    agent.learner.alpha = 2.0
    # agent.learner.rprop = True
    agent.actaspg = False
    #    agent.disableLearning()

    agents.append(agent)
    tasks.append(task)
""" The Experiment will coordintate the interaction of the given agents and
their associated tasks. """
experiment = MarketExperiment(tasks, agents, mkt)
experiment.setRenderer(ExperimentRenderer())
""" Instruct the experiment to coordinate a set number of interactions. """