Beispiel #1
0
    agent.learner.gd.alpha = 0.3 #step size of \mu adaption
    agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption
    agent.learner.gd.momentum = 0.0
    batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting)
    #create experiment
    experiment = EpisodicExperiment(task, agent)
    prnts=1 #frequency of console output
    epis=2000/batch/prnts
    
    #actual roll outs
    filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat"
    wf = open(filename, 'wb')
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch) #execute #batch episodes
            agent.learn() #learn from the gather experience
            agent.reset() #reset agent and environment
        #print out related data
        stp = (updates+1)*batch*prnts
        print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward   
        wf.write(repr(stp)+"\n") 
        wf.write(repr(agent.learner.baseline[0])+"\n") 
        if useGraphics:
            pl.addData(0,float(stp),agent.learner.baseline)
            pl.addData(1,float(stp),agent.learner.best)
            pl.update()

        #if updates/100 == float(updates)/100.0:
        #    saveWeights("walk.wgt", agent.learner.original)  
    wf.close()      
Beispiel #2
0
for i in range(len(case.online_generators)):
    plot.setData(i, rday, numpy.zeros(nf))
plot.setData(3, rday, f_dc[:nf])
plot.setData(4, rday, f_ac[:nf])
plot.setData(5, rday, numpy.zeros(nf))  # reward
#plot.setData(6, rday, Pg_ac[:nf] * 10)

plot.setLineStyle(0, color="red")
plot.setLineStyle(1, color="green")
plot.setLineStyle(2, color="blue")
plot.setLineStyle(3, color="black")
plot.setLineStyle(4, color="gray")
plot.setLineStyle(5, color="orange")
#plot.setLineStyle(6, color="black")
plot.setLineStyle(linewidth=2)
plot.update()

# Give the agent its task in an experiment.
#experiment = EpisodicExperiment(task, agent)
experiment = pyreto.rlopf.OPFExperiment(task, agent)

weeks = 52 * 2
days = 5  # number of samples per gradient estimate
for week in range(weeks):
    all_rewards = experiment.doEpisodes(number=days)
    tot_reward = numpy.mean(agent.history.getSumOverSequences('reward'))

    #    print learner._allEvaluations#[-:-1]

    # Plot the reward at each period averaged over the week.
    r = -1.0 * numpy.array(all_rewards).reshape(days, nf)
Beispiel #3
0
while x < 5000:
    #while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(
        agent.history.getSumOverSequences('reward')) * task.rewardscale
    if useGraphics:
        pl.addData(0, x, reward)
    print(agent.module.params)
    print(reward)
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()

if len(sys.argv) > 2:
    agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True)
if useGraphics:
    pl.show(popup=True)

#To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)

## performance:
## experiment.doEpisodes(5) * 100 without weave:
##    real    2m39.683s
##    user    2m33.358s
##    sys     0m5.960s
## experiment.doEpisodes(5) * 100 with weave:
##real    2m41.275s
Beispiel #4
0
for i in range(len(case.online_generators)):
    plot.setData(i, rday, numpy.zeros(nf))
plot.setData(3, rday, f_dc[:nf])
plot.setData(4, rday, f_ac[:nf])
plot.setData(5, rday, numpy.zeros(nf)) # reward
#plot.setData(6, rday, Pg_ac[:nf] * 10)

plot.setLineStyle(0, color="red")
plot.setLineStyle(1, color="green")
plot.setLineStyle(2, color="blue")
plot.setLineStyle(3, color="black")
plot.setLineStyle(4, color="gray")
plot.setLineStyle(5, color="orange")
#plot.setLineStyle(6, color="black")
plot.setLineStyle(linewidth=2)
plot.update()

# Give the agent its task in an experiment.
#experiment = EpisodicExperiment(task, agent)
experiment = pyreto.rlopf.OPFExperiment(task, agent)

weeks = 52 * 2
days = 5 # number of samples per gradient estimate
for week in range(weeks):
    all_rewards = experiment.doEpisodes(number=days)
    tot_reward = numpy.mean(agent.history.getSumOverSequences('reward'))

#    print learner._allEvaluations#[-:-1]

    # Plot the reward at each period averaged over the week.
    r = -1.0 * numpy.array(all_rewards).reshape(days, nf)
Beispiel #5
0
    for j, task in enumerate(experiment.tasks):
        g = task.env.asset
        assert g.pcost_model == pylon.PW_LINEAR
        xx = [xx for xx, _ in g.p_cost]
        yy = [yy for _, yy in g.p_cost]

        plc.setData(j + len(case.generators), xx, yy)

        xa = [g.p, g.p]
        yb = [0.0, g.total_cost()]
        plc.setData(j + 2 * len(case.generators), xa, yb)

#    plotGenCost(case.generators)

    pylab.figure(1)
    pl.update()
    pylab.figure(2)
    pl2.update()
    pylab.figure(3)
    plc.update()
    x += batch

logger.info("Example completed in %.3fs" % (time.time() - t0))

#from pyreto.util import sparklineData
#sparklineData(agent_map, "auctiondata.txt")

#tableZip = zipfile.ZipFile("%s.zip" % timestr, "w")
#for a in experiment.agents:
#    for t in ("state", "action", "reward"):
#        tmpName = tableMap[t][a.name]
Beispiel #6
0
    for j, task in enumerate(experiment.tasks):
        g = task.env.asset
        assert g.pcost_model == pylon.PW_LINEAR
        xx = [xx for xx, _ in g.p_cost]
        yy = [yy for _, yy in g.p_cost]

        plc.setData(j + len(case.generators), xx, yy)

        xa = [g.p, g.p]
        yb = [0.0, g.total_cost()]
        plc.setData(j + 2 * len(case.generators), xa, yb)

#    plotGenCost(case.generators)

    pylab.figure(1)
    pl.update()
    pylab.figure(2)
    pl2.update()
    pylab.figure(3)
    plc.update()
    x += batch

logger.info("Example completed in %.3fs" % (time.time() - t0))

#from pyreto.util import sparklineData
#sparklineData(agent_map, "auctiondata.txt")

#tableZip = zipfile.ZipFile("%s.zip" % timestr, "w")
#for a in experiment.agents:
#    for t in ("state", "action", "reward"):
#        tmpName = tableMap[t][a.name]
Beispiel #7
0
def runNN():
    # data set CSV
    _trncsv = '../ml_pybrain/case1_send1/inc_attr_30_train.csv'
    _tstcsv = '../ml_pybrain/case1_send1/inc_attr_30_test.csv'
    #_attrnum = 30 # attrnum is imput dim, get later auto
    _classnum = 2
    # num of hidden layer
    _hidden = 12
    # max epochs
    _maxepochs = 100
    # learn val
    _learningrate = 0.013 # default 0.01
    _momentum = 0.03 # default 0.0, tutorial 0.1
    _lrdecay = 1.0 # default 1.0
    _weightdecay = 0.01 # default 0.0, tutorial 0.01
    # save training log path
    _logpath = 'att30class2_2.log'
    # graph
    _graphymax = 15

    '''#build 3 class
    means = [(-1,0),(2,4),(3,1)]
    cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
    alldata = ClassificationDataSet(2, 1, nb_classes=3)
    for n in xrange(400):
        for klass in range(3):
            input = multivariate_normal(means[klass],cov[klass])
            alldata.addSample(input, [klass])

    #split 25% test, 75% train
    tstdata, trndata = alldata.splitWithProportion(0.25)


    ''' #read csv
    with Timer() as readt:
        # read train data
        dictdata = csv.DictReader(open(_trncsv, 'r'))
        data  = [[row[f] for f in dictdata.fieldnames] for row in dictdata]
        # from 0th to last-1 col are training data set
        train = [[float(elm) for elm in row[0:-1]] for row in data]
        # last col is target data set, convert from ONE start to ZERO start
        target = [[int(row[-1])-1] for row in data]
        # get input dim
        _attrnum = len(train[0])
        # set DataSet
        trndata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum)
        trndata.setField('input', train)
        trndata.setField('target', target)

        # read test data
        dictdata = None
        dictdata = csv.DictReader(open(_tstcsv, 'r'))
        data  = [[row[f] for f in dictdata.fieldnames] for row in dictdata]
        # from 0th to last-1 col are training data set
        train = [[float(elm) for elm in row[0:-1]] for row in data]
        # last col is target data set, convert from ONE start to ZERO start
        target = [[int(row[-1])-1] for row in data]
        # set DataSet
        tstdata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum)
        tstdata.setField('input', train)
        tstdata.setField('target', target)

    #'''

    # 1-of-k representation
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()

    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]

    # build network and tariner
    fnn = buildNetwork( trndata.indim, _hidden, trndata.outdim, outclass=SoftmaxLayer)
    #trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
    trainer = BackpropTrainer(fnn, dataset=trndata, verbose=True,
        learningrate = _learningrate,
        momentum = _momentum,
        lrdecay = _lrdecay,
        weightdecay = _weightdecay
        )

    # setup graph
    xmax = _maxepochs
    ymax = _graphymax
    figure(figsize=[12,8])
    ion()
    draw()
    graph = MultilinePlotter(xlim=[1, xmax], ylim=[0, ymax])
    graph.setLineStyle([0,1], linewidth=2)
    graph.setLabels(x='epoch', y='error %')
    graph.setLegend(['training', 'test'], loc='upper right')
    graph.update()
    draw()

    # setup storage training curve
    trainx = []
    trny = []
    tsty = []

    # start training
    with Timer() as traint:
        for i in range(_maxepochs):
            # train
            trainer.trainEpochs(1)
            # test by train/test
            trnresult = percentError(trainer.testOnClassData(), trndata['class'])
            tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class'])
            print "epoch: %4d" % trainer.totalepochs, \
                "  train error: %5.2f%%" % trnresult, \
                "  test error: %5.2f%%" % tstresult
            # store curve
            trainx.append(i+1)
            trny.append(trnresult)
            tsty.append(tstresult)

            # draw graph
            graph.addData(0, i+1, trnresult)
            graph.addData(1, i+1, tstresult)
            graph.update()
            draw()

    # save log
    f = csv.writer(open(_logpath, 'w'))
    # timer
    f.writerow(['read', readt.secs])
    f.writerow(['training and test(sec)', traint.secs])
    # data prop
    f.writerow(['train data num', len(trndata)])
    f.writerow(['test data num', len(tstdata)])
    f.writerow(['in / out dim', trndata.indim, trndata.outdim])
    # config
    f.writerow(['hidden', _hidden])
    f.writerow(['maxepochs', _maxepochs])
    f.writerow(['learningrate', _learningrate])
    f.writerow(['momentum', _momentum])
    f.writerow(['lrdecay', _lrdecay])
    f.writerow(['weightdecay', _weightdecay])
    # curve
    f.writerow(['epoch', 'train_err', 'test_err'])
    f.writerows([[trainx[r], trny[r], tsty[r]] for r in range(len(trainx))])