# print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting) while x < 5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean( agent.history.getSumOverSequences('reward')) * task.rewardscale if useGraphics: pl.addData(0, x, reward) print(agent.module.params) print(reward) #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2: agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True) if useGraphics: pl.show(popup=True) #To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
agent.learner.gd.alpha = 0.3 #step size of \mu adaption agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption agent.learner.gd.momentum = 0.0 batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting) #create experiment experiment = EpisodicExperiment(task, agent) prnts=1 #frequency of console output epis=2000/batch/prnts #actual roll outs filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat" wf = open(filename, 'wb') for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) #execute #batch episodes agent.learn() #learn from the gather experience agent.reset() #reset agent and environment #print out related data stp = (updates+1)*batch*prnts print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward wf.write(repr(stp)+"\n") wf.write(repr(agent.learner.baseline[0])+"\n") if useGraphics: pl.addData(0,float(stp),agent.learner.baseline) pl.addData(1,float(stp),agent.learner.best) pl.update() #if updates/100 == float(updates)/100.0: # saveWeights("walk.wgt", agent.learner.original) wf.close()
# fd.write("# %s %s data - %s\n" % (a.name, t, timestr)) # fd.close() # tableMap[t][a.name] = tmpName # Execute interactions with the environment in batch mode. t0 = time.time() x = 0 batch = 2 while x <= 1000: experiment.doInteractions(batch) for i, agent in enumerate(experiment.agents): s, a, r = agent.history.getSequence(agent.history.getNumSequences() - 1) pl.addData(i, x, scipy.mean(a)) pl2.addData(i, x, scipy.mean(r)) action, reward = agentMap[agent.name] agentMap[agent.name] = (scipy.r_[action, a.flatten()], scipy.r_[reward, r.flatten()]) # for n, seq in (("state", s), ("action", a), ("reward", r)): # tmpName = tableMap[n][agent.name] # fd = file(tmpName, "a+b") # for i in range(batch): # fd.write("%.1f %.5f\n" % (x + i, seq[i])) # fd.close() agent.learn()
# # rewards.append(mean(agent.history.getSumOverSequences('reward'))) # print agent.module.getParameters(), # print mean(agent.history.getSumOverSequences('reward')) # clf() # plot(rewards) # episodic version x = 0 batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting) while x<5000: #while True: experiment.doEpisodes(batch) x += batch reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale if useGraphics: pl.addData(0,x,reward) print(agent.module.params) print(reward) #if reward > 3: # pass agent.learn() agent.reset() if useGraphics: pl.update() if len(sys.argv) > 2: agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True) if useGraphics: pl.show( popup = True)
# Solve an initial OPF. OPF(case, market.locationalAdjustment=='dc', opt={"verbose": False}).solve() weeks = 208 # number of roleouts days = 7 # number of samples per learning step for week in range(weeks): experiment.doEpisodes(days) if manual_sigma: # sigma = [sig - abs(sig * 0.05) - 0.1 for sig in sigma] sigma = [sig - 1.0 for sig in sigma] print "SIGMA:", sigma reward = experiment.agents[0].history["reward"] plot.addData(0, week, scipy.mean(reward)) for i, agent in enumerate(experiment.agents): agent.learn() agent.reset() # if manual_sigma and hasattr(agent, "learner"): # agent.learner.explorer.sigma = sigma print "ALPHA:", experiment.agents[0].learner.gd.alpha print "PARAMS:", experiment.agents[0].module.params plot.update() pylab.savefig("/tmp/pyreto.png")
# fd = file(tmpName, "w+b") # fd.write("# %s %s data - %s\n" % (a.name, t, timestr)) # fd.close() # tableMap[t][a.name] = tmpName # Execute interactions with the environment in batch mode. t0 = time.time() x = 0 batch = 2 while x <= 1000: experiment.doInteractions(batch) for i, agent in enumerate(experiment.agents): s,a,r = agent.history.getSequence(agent.history.getNumSequences() - 1) pl.addData(i, x, scipy.mean(a)) pl2.addData(i, x, scipy.mean(r)) action, reward = agentMap[agent.name] agentMap[agent.name] = (scipy.r_[action, a.flatten()], scipy.r_[reward, r.flatten()]) # for n, seq in (("state", s), ("action", a), ("reward", r)): # tmpName = tableMap[n][agent.name] # fd = file(tmpName, "a+b") # for i in range(batch): # fd.write("%.1f %.5f\n" % (x + i, seq[i])) # fd.close() agent.learn() agent.reset()
def runNN(): # data set CSV _trncsv = '../ml_pybrain/case1_send1/inc_attr_30_train.csv' _tstcsv = '../ml_pybrain/case1_send1/inc_attr_30_test.csv' #_attrnum = 30 # attrnum is imput dim, get later auto _classnum = 2 # num of hidden layer _hidden = 12 # max epochs _maxepochs = 100 # learn val _learningrate = 0.013 # default 0.01 _momentum = 0.03 # default 0.0, tutorial 0.1 _lrdecay = 1.0 # default 1.0 _weightdecay = 0.01 # default 0.0, tutorial 0.01 # save training log path _logpath = 'att30class2_2.log' # graph _graphymax = 15 '''#build 3 class means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) alldata.addSample(input, [klass]) #split 25% test, 75% train tstdata, trndata = alldata.splitWithProportion(0.25) ''' #read csv with Timer() as readt: # read train data dictdata = csv.DictReader(open(_trncsv, 'r')) data = [[row[f] for f in dictdata.fieldnames] for row in dictdata] # from 0th to last-1 col are training data set train = [[float(elm) for elm in row[0:-1]] for row in data] # last col is target data set, convert from ONE start to ZERO start target = [[int(row[-1])-1] for row in data] # get input dim _attrnum = len(train[0]) # set DataSet trndata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum) trndata.setField('input', train) trndata.setField('target', target) # read test data dictdata = None dictdata = csv.DictReader(open(_tstcsv, 'r')) data = [[row[f] for f in dictdata.fieldnames] for row in dictdata] # from 0th to last-1 col are training data set train = [[float(elm) for elm in row[0:-1]] for row in data] # last col is target data set, convert from ONE start to ZERO start target = [[int(row[-1])-1] for row in data] # set DataSet tstdata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum) tstdata.setField('input', train) tstdata.setField('target', target) #''' # 1-of-k representation trndata._convertToOneOfMany() tstdata._convertToOneOfMany() print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] # build network and tariner fnn = buildNetwork( trndata.indim, _hidden, trndata.outdim, outclass=SoftmaxLayer) #trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) trainer = BackpropTrainer(fnn, dataset=trndata, verbose=True, learningrate = _learningrate, momentum = _momentum, lrdecay = _lrdecay, weightdecay = _weightdecay ) # setup graph xmax = _maxepochs ymax = _graphymax figure(figsize=[12,8]) ion() draw() graph = MultilinePlotter(xlim=[1, xmax], ylim=[0, ymax]) graph.setLineStyle([0,1], linewidth=2) graph.setLabels(x='epoch', y='error %') graph.setLegend(['training', 'test'], loc='upper right') graph.update() draw() # setup storage training curve trainx = [] trny = [] tsty = [] # start training with Timer() as traint: for i in range(_maxepochs): # train trainer.trainEpochs(1) # test by train/test trnresult = percentError(trainer.testOnClassData(), trndata['class']) tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class']) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult # store curve trainx.append(i+1) trny.append(trnresult) tsty.append(tstresult) # draw graph graph.addData(0, i+1, trnresult) graph.addData(1, i+1, tstresult) graph.update() draw() # save log f = csv.writer(open(_logpath, 'w')) # timer f.writerow(['read', readt.secs]) f.writerow(['training and test(sec)', traint.secs]) # data prop f.writerow(['train data num', len(trndata)]) f.writerow(['test data num', len(tstdata)]) f.writerow(['in / out dim', trndata.indim, trndata.outdim]) # config f.writerow(['hidden', _hidden]) f.writerow(['maxepochs', _maxepochs]) f.writerow(['learningrate', _learningrate]) f.writerow(['momentum', _momentum]) f.writerow(['lrdecay', _lrdecay]) f.writerow(['weightdecay', _weightdecay]) # curve f.writerow(['epoch', 'train_err', 'test_err']) f.writerows([[trainx[r], trny[r], tsty[r]] for r in range(len(trainx))])