def initGraphics(self, ymax=10, xmax=-1): """ initialize the interactive graphics output window, and return a handle to the plot """ if xmax<0: xmax = self.maxepochs figure(figsize=[12,8]) ion() draw() #self.Graph = MultilinePlotter(autoscale=1.2 ) #xlim=[0, self.maxepochs], ylim=[0, ymax]) self.Graph = MultilinePlotter(xlim=[0, xmax], ylim=[0, ymax]) self.Graph.setLineStyle([0,1],linewidth=2) return self.Graph
learner.explorer.sigma = sigma #learner.explorer.epsilon = 0.01 # default: 0.3 #learner.learningRate = 0.01 # (0.1-0.001, down to 1e-7 for RNNs) # Alternatively, use blackbox optimisation. #learner = HillClimber(storeAllEvaluations=True) ##learner = CMAES(storeAllEvaluations=True) ##learner = FEM(storeAllEvaluations=True) ##learner = ExactNES(storeAllEvaluations=True) ##learner = PGPE(storeAllEvaluations=True) #agent = OptimizationAgent(net, learner) # Prepare for plotting. pylab.figure() #figsize=(16,8)) pylab.ion() plot = MultilinePlotter(autoscale=1.1, xlim=[0, nf], ylim=[0, 1]) # Read ideal system cost and set-point values determined using OPF. f_dc = scipy.io.mmread("../data/fDC.mtx").flatten() f_ac = scipy.io.mmread("../data/fAC.mtx").flatten() Pg_dc = scipy.io.mmread("../data/PgDC.mtx") Pg_ac = scipy.io.mmread("../data/PgAC.mtx") Qg_ac = scipy.io.mmread("../data/QgAC.mtx") rday = range(nf) for i in range(len(case.online_generators)): plot.setData(i, rday, numpy.zeros(nf)) plot.setData(3, rday, f_dc[:nf]) plot.setData(4, rday, f_ac[:nf]) plot.setData(5, rday, numpy.zeros(nf)) # reward #plot.setData(6, rday, Pg_ac[:nf] * 10)
learner.gd.momentum = 0.9 agent = LearningAgent(net, learner) agent.actaspg = False # create experiment experiment = EpisodicExperiment(task, agent) # print weights at beginning print(agent.module.params) rewards = [] if useGraphics: figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) # queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop() # # rewards.append(mean(agent.history.getSumOverSequences('reward'))) # print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
module.initialize(1.0) # learner = SARSA(gamma=0.9) learner = Q() # learner = QLambda() # learner.explorer = BoltzmannExplorer() # default is e-greedy. agent = LearningAgent(module, learner) agent.name = g.name experiment.tasks.append(task) experiment.agents.append(agent) # Prepare for plotting. pylab.figure(1) #figsize=(16,8)) pylab.ion() pl = MultilinePlotter(autoscale=1.1, xlim=[0, 24], ylim=[0, 1], maxLines=len(experiment.agents)) pl.setLineStyle(linewidth=2) pl.setLegend([a.name for a in experiment.agents], loc='upper left') pylab.figure(2) pylab.ion() pl2 = MultilinePlotter(autoscale=1.1, xlim=[0, 24], ylim=[0, 1], maxLines=len(experiment.agents)) pl2.setLineStyle(linewidth=2) pylab.figure(3) pylab.ion() plc = MultilinePlotter(autoscale=1.1,