agent.learner.gd.alpha = 0.3 #step size of \mu adaption agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption agent.learner.gd.momentum = 0.0 batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting) #create experiment experiment = EpisodicExperiment(task, agent) prnts=1 #frequency of console output epis=2000/batch/prnts #actual roll outs filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat" wf = open(filename, 'wb') for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) #execute #batch episodes agent.learn() #learn from the gather experience agent.reset() #reset agent and environment #print out related data stp = (updates+1)*batch*prnts print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward wf.write(repr(stp)+"\n") wf.write(repr(agent.learner.baseline[0])+"\n") if useGraphics: pl.addData(0,float(stp),agent.learner.baseline) pl.addData(1,float(stp),agent.learner.best) pl.update() #if updates/100 == float(updates)/100.0: # saveWeights("walk.wgt", agent.learner.original) wf.close()
for i in range(len(case.online_generators)): plot.setData(i, rday, numpy.zeros(nf)) plot.setData(3, rday, f_dc[:nf]) plot.setData(4, rday, f_ac[:nf]) plot.setData(5, rday, numpy.zeros(nf)) # reward #plot.setData(6, rday, Pg_ac[:nf] * 10) plot.setLineStyle(0, color="red") plot.setLineStyle(1, color="green") plot.setLineStyle(2, color="blue") plot.setLineStyle(3, color="black") plot.setLineStyle(4, color="gray") plot.setLineStyle(5, color="orange") #plot.setLineStyle(6, color="black") plot.setLineStyle(linewidth=2) plot.update() # Give the agent its task in an experiment. #experiment = EpisodicExperiment(task, agent) experiment = pyreto.rlopf.OPFExperiment(task, agent) weeks = 52 * 2 days = 5 # number of samples per gradient estimate for week in range(weeks): all_rewards = experiment.doEpisodes(number=days) tot_reward = numpy.mean(agent.history.getSumOverSequences('reward')) # print learner._allEvaluations#[-:-1] # Plot the reward at each period averaged over the week. r = -1.0 * numpy.array(all_rewards).reshape(days, nf)
while x < 5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean( agent.history.getSumOverSequences('reward')) * task.rewardscale if useGraphics: pl.addData(0, x, reward) print(agent.module.params) print(reward) #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2: agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True) if useGraphics: pl.show(popup=True) #To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation) ## performance: ## experiment.doEpisodes(5) * 100 without weave: ## real 2m39.683s ## user 2m33.358s ## sys 0m5.960s ## experiment.doEpisodes(5) * 100 with weave: ##real 2m41.275s
for i in range(len(case.online_generators)): plot.setData(i, rday, numpy.zeros(nf)) plot.setData(3, rday, f_dc[:nf]) plot.setData(4, rday, f_ac[:nf]) plot.setData(5, rday, numpy.zeros(nf)) # reward #plot.setData(6, rday, Pg_ac[:nf] * 10) plot.setLineStyle(0, color="red") plot.setLineStyle(1, color="green") plot.setLineStyle(2, color="blue") plot.setLineStyle(3, color="black") plot.setLineStyle(4, color="gray") plot.setLineStyle(5, color="orange") #plot.setLineStyle(6, color="black") plot.setLineStyle(linewidth=2) plot.update() # Give the agent its task in an experiment. #experiment = EpisodicExperiment(task, agent) experiment = pyreto.rlopf.OPFExperiment(task, agent) weeks = 52 * 2 days = 5 # number of samples per gradient estimate for week in range(weeks): all_rewards = experiment.doEpisodes(number=days) tot_reward = numpy.mean(agent.history.getSumOverSequences('reward')) # print learner._allEvaluations#[-:-1] # Plot the reward at each period averaged over the week. r = -1.0 * numpy.array(all_rewards).reshape(days, nf)
for j, task in enumerate(experiment.tasks): g = task.env.asset assert g.pcost_model == pylon.PW_LINEAR xx = [xx for xx, _ in g.p_cost] yy = [yy for _, yy in g.p_cost] plc.setData(j + len(case.generators), xx, yy) xa = [g.p, g.p] yb = [0.0, g.total_cost()] plc.setData(j + 2 * len(case.generators), xa, yb) # plotGenCost(case.generators) pylab.figure(1) pl.update() pylab.figure(2) pl2.update() pylab.figure(3) plc.update() x += batch logger.info("Example completed in %.3fs" % (time.time() - t0)) #from pyreto.util import sparklineData #sparklineData(agent_map, "auctiondata.txt") #tableZip = zipfile.ZipFile("%s.zip" % timestr, "w") #for a in experiment.agents: # for t in ("state", "action", "reward"): # tmpName = tableMap[t][a.name]
for j, task in enumerate(experiment.tasks): g = task.env.asset assert g.pcost_model == pylon.PW_LINEAR xx = [xx for xx, _ in g.p_cost] yy = [yy for _, yy in g.p_cost] plc.setData(j + len(case.generators), xx, yy) xa = [g.p, g.p] yb = [0.0, g.total_cost()] plc.setData(j + 2 * len(case.generators), xa, yb) # plotGenCost(case.generators) pylab.figure(1) pl.update() pylab.figure(2) pl2.update() pylab.figure(3) plc.update() x += batch logger.info("Example completed in %.3fs" % (time.time() - t0)) #from pyreto.util import sparklineData #sparklineData(agent_map, "auctiondata.txt") #tableZip = zipfile.ZipFile("%s.zip" % timestr, "w") #for a in experiment.agents: # for t in ("state", "action", "reward"): # tmpName = tableMap[t][a.name]
def runNN(): # data set CSV _trncsv = '../ml_pybrain/case1_send1/inc_attr_30_train.csv' _tstcsv = '../ml_pybrain/case1_send1/inc_attr_30_test.csv' #_attrnum = 30 # attrnum is imput dim, get later auto _classnum = 2 # num of hidden layer _hidden = 12 # max epochs _maxepochs = 100 # learn val _learningrate = 0.013 # default 0.01 _momentum = 0.03 # default 0.0, tutorial 0.1 _lrdecay = 1.0 # default 1.0 _weightdecay = 0.01 # default 0.0, tutorial 0.01 # save training log path _logpath = 'att30class2_2.log' # graph _graphymax = 15 '''#build 3 class means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) alldata.addSample(input, [klass]) #split 25% test, 75% train tstdata, trndata = alldata.splitWithProportion(0.25) ''' #read csv with Timer() as readt: # read train data dictdata = csv.DictReader(open(_trncsv, 'r')) data = [[row[f] for f in dictdata.fieldnames] for row in dictdata] # from 0th to last-1 col are training data set train = [[float(elm) for elm in row[0:-1]] for row in data] # last col is target data set, convert from ONE start to ZERO start target = [[int(row[-1])-1] for row in data] # get input dim _attrnum = len(train[0]) # set DataSet trndata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum) trndata.setField('input', train) trndata.setField('target', target) # read test data dictdata = None dictdata = csv.DictReader(open(_tstcsv, 'r')) data = [[row[f] for f in dictdata.fieldnames] for row in dictdata] # from 0th to last-1 col are training data set train = [[float(elm) for elm in row[0:-1]] for row in data] # last col is target data set, convert from ONE start to ZERO start target = [[int(row[-1])-1] for row in data] # set DataSet tstdata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum) tstdata.setField('input', train) tstdata.setField('target', target) #''' # 1-of-k representation trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] # build network and tariner fnn = buildNetwork( trndata.indim, _hidden, trndata.outdim, outclass=SoftmaxLayer) #trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer = BackpropTrainer(fnn, dataset=trndata, verbose=True, learningrate = _learningrate, momentum = _momentum, lrdecay = _lrdecay, weightdecay = _weightdecay ) # setup graph xmax = _maxepochs ymax = _graphymax figure(figsize=[12,8]) ion() draw() graph = MultilinePlotter(xlim=[1, xmax], ylim=[0, ymax]) graph.setLineStyle([0,1], linewidth=2) graph.setLabels(x='epoch', y='error %') graph.setLegend(['training', 'test'], loc='upper right') graph.update() draw() # setup storage training curve trainx = [] trny = [] tsty = [] # start training with Timer() as traint: for i in range(_maxepochs): # train trainer.trainEpochs(1) # test by train/test trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult # store curve trainx.append(i+1) trny.append(trnresult) tsty.append(tstresult) # draw graph graph.addData(0, i+1, trnresult) graph.addData(1, i+1, tstresult) graph.update() draw() # save log f = csv.writer(open(_logpath, 'w')) # timer f.writerow(['read', readt.secs]) f.writerow(['training and test(sec)', traint.secs]) # data prop f.writerow(['train data num', len(trndata)]) f.writerow(['test data num', len(tstdata)]) f.writerow(['in / out dim', trndata.indim, trndata.outdim]) # config f.writerow(['hidden', _hidden]) f.writerow(['maxepochs', _maxepochs]) f.writerow(['learningrate', _learningrate]) f.writerow(['momentum', _momentum]) f.writerow(['lrdecay', _lrdecay]) f.writerow(['weightdecay', _weightdecay]) # curve f.writerow(['epoch', 'train_err', 'test_err']) f.writerows([[trainx[r], trny[r], tsty[r]] for r in range(len(trainx))])