Beispiel #1
0
 def initGraphics(self, ymax=10, xmax=-1):
     """ initialize the interactive graphics output window, and return a handle to the plot """
     if xmax<0:
         xmax = self.maxepochs
     figure(figsize=[12,8])
     ion()
     draw()
     #self.Graph = MultilinePlotter(autoscale=1.2 ) #xlim=[0, self.maxepochs], ylim=[0, ymax])
     self.Graph = MultilinePlotter(xlim=[0, xmax], ylim=[0, ymax])
     self.Graph.setLineStyle([0,1],linewidth=2)
     return self.Graph
Beispiel #2
0
learner.explorer.sigma = sigma
#learner.explorer.epsilon = 0.01 # default: 0.3
#learner.learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs)

# Alternatively, use blackbox optimisation.
#learner = HillClimber(storeAllEvaluations=True)
##learner = CMAES(storeAllEvaluations=True)
##learner = FEM(storeAllEvaluations=True)
##learner = ExactNES(storeAllEvaluations=True)
##learner = PGPE(storeAllEvaluations=True)
#agent = OptimizationAgent(net, learner)

# Prepare for plotting.
pylab.figure()  #figsize=(16,8))
pylab.ion()
plot = MultilinePlotter(autoscale=1.1, xlim=[0, nf], ylim=[0, 1])

# Read ideal system cost and set-point values determined using OPF.
f_dc = scipy.io.mmread("../data/fDC.mtx").flatten()
f_ac = scipy.io.mmread("../data/fAC.mtx").flatten()
Pg_dc = scipy.io.mmread("../data/PgDC.mtx")
Pg_ac = scipy.io.mmread("../data/PgAC.mtx")
Qg_ac = scipy.io.mmread("../data/QgAC.mtx")

rday = range(nf)
for i in range(len(case.online_generators)):
    plot.setData(i, rday, numpy.zeros(nf))
plot.setData(3, rday, f_dc[:nf])
plot.setData(4, rday, f_ac[:nf])
plot.setData(5, rday, numpy.zeros(nf))  # reward
#plot.setData(6, rday, Pg_ac[:nf] * 10)
Beispiel #3
0
learner.gd.momentum = 0.9

agent = LearningAgent(net, learner)
agent.actaspg = False

# create experiment
experiment = EpisodicExperiment(task, agent)

# print weights at beginning
print(agent.module.params)

rewards = []
if useGraphics:
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30  #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
Beispiel #4
0
    module.initialize(1.0)
    #    learner = SARSA(gamma=0.9)
    learner = Q()
    #    learner = QLambda()
    #    learner.explorer = BoltzmannExplorer() # default is e-greedy.
    agent = LearningAgent(module, learner)

    agent.name = g.name
    experiment.tasks.append(task)
    experiment.agents.append(agent)

# Prepare for plotting.
pylab.figure(1)  #figsize=(16,8))
pylab.ion()
pl = MultilinePlotter(autoscale=1.1,
                      xlim=[0, 24],
                      ylim=[0, 1],
                      maxLines=len(experiment.agents))
pl.setLineStyle(linewidth=2)
pl.setLegend([a.name for a in experiment.agents], loc='upper left')

pylab.figure(2)
pylab.ion()
pl2 = MultilinePlotter(autoscale=1.1,
                       xlim=[0, 24],
                       ylim=[0, 1],
                       maxLines=len(experiment.agents))
pl2.setLineStyle(linewidth=2)

pylab.figure(3)
pylab.ion()
plc = MultilinePlotter(autoscale=1.1,