Beispiel #1
0
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30  #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x < 5000:
    #while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(
        agent.history.getSumOverSequences('reward')) * task.rewardscale
    if useGraphics:
        pl.addData(0, x, reward)
    print(agent.module.params)
    print(reward)
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()

if len(sys.argv) > 2:
    agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True)
if useGraphics:
    pl.show(popup=True)

#To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
Beispiel #2
0
    agent.learner.gd.alpha = 0.3 #step size of \mu adaption
    agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption
    agent.learner.gd.momentum = 0.0
    batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting)
    #create experiment
    experiment = EpisodicExperiment(task, agent)
    prnts=1 #frequency of console output
    epis=2000/batch/prnts
    
    #actual roll outs
    filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat"
    wf = open(filename, 'wb')
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch) #execute #batch episodes
            agent.learn() #learn from the gather experience
            agent.reset() #reset agent and environment
        #print out related data
        stp = (updates+1)*batch*prnts
        print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward   
        wf.write(repr(stp)+"\n") 
        wf.write(repr(agent.learner.baseline[0])+"\n") 
        if useGraphics:
            pl.addData(0,float(stp),agent.learner.baseline)
            pl.addData(1,float(stp),agent.learner.best)
            pl.update()

        #if updates/100 == float(updates)/100.0:
        #    saveWeights("walk.wgt", agent.learner.original)  
    wf.close()      
Beispiel #3
0
#        fd.write("# %s %s data - %s\n" % (a.name, t, timestr))
#        fd.close()
#        tableMap[t][a.name] = tmpName

# Execute interactions with the environment in batch mode.
t0 = time.time()
x = 0
batch = 2
while x <= 1000:
    experiment.doInteractions(batch)

    for i, agent in enumerate(experiment.agents):
        s, a, r = agent.history.getSequence(agent.history.getNumSequences() -
                                            1)

        pl.addData(i, x, scipy.mean(a))
        pl2.addData(i, x, scipy.mean(r))

        action, reward = agentMap[agent.name]
        agentMap[agent.name] = (scipy.r_[action,
                                         a.flatten()], scipy.r_[reward,
                                                                r.flatten()])

        #        for n, seq in (("state", s), ("action", a), ("reward", r)):
        #            tmpName = tableMap[n][agent.name]
        #            fd = file(tmpName, "a+b")
        #            for i in range(batch):
        #                fd.write("%.1f %.5f\n" % (x + i, seq[i]))
        #            fd.close()

        agent.learn()
Beispiel #4
0
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x<5000:
#while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale
    if useGraphics:
        pl.addData(0,x,reward)
    print(agent.module.params)
    print(reward)
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()


if len(sys.argv) > 2:
    agent.history.saveToFile(sys.argv[1], protocol=-1, arraysonly=True)
if useGraphics:
    pl.show( popup = True)
Beispiel #5
0
# Solve an initial OPF.
OPF(case, market.locationalAdjustment=='dc', opt={"verbose": False}).solve()

weeks = 208 # number of roleouts
days = 7 # number of samples per learning step
for week in range(weeks):
    experiment.doEpisodes(days)

    if manual_sigma:
#        sigma = [sig - abs(sig * 0.05) - 0.1 for sig in sigma]
        sigma = [sig - 1.0 for sig in sigma]
        print "SIGMA:", sigma

    reward = experiment.agents[0].history["reward"]
    plot.addData(0, week, scipy.mean(reward))

    for i, agent in enumerate(experiment.agents):
        agent.learn()
        agent.reset()

#        if manual_sigma and hasattr(agent, "learner"):
#            agent.learner.explorer.sigma = sigma

    print "ALPHA:", experiment.agents[0].learner.gd.alpha
    print "PARAMS:", experiment.agents[0].module.params

    plot.update()

pylab.savefig("/tmp/pyreto.png")
Beispiel #6
0
#        fd = file(tmpName, "w+b")
#        fd.write("# %s %s data - %s\n" % (a.name, t, timestr))
#        fd.close()
#        tableMap[t][a.name] = tmpName

# Execute interactions with the environment in batch mode.
t0 = time.time()
x = 0
batch = 2
while x <= 1000:
    experiment.doInteractions(batch)

    for i, agent in enumerate(experiment.agents):
        s,a,r = agent.history.getSequence(agent.history.getNumSequences() - 1)

        pl.addData(i, x, scipy.mean(a))
        pl2.addData(i, x, scipy.mean(r))

        action, reward = agentMap[agent.name]
        agentMap[agent.name] = (scipy.r_[action, a.flatten()],
                                scipy.r_[reward, r.flatten()])

#        for n, seq in (("state", s), ("action", a), ("reward", r)):
#            tmpName = tableMap[n][agent.name]
#            fd = file(tmpName, "a+b")
#            for i in range(batch):
#                fd.write("%.1f %.5f\n" % (x + i, seq[i]))
#            fd.close()

        agent.learn()
        agent.reset()
Beispiel #7
0
def runNN():
    # data set CSV
    _trncsv = '../ml_pybrain/case1_send1/inc_attr_30_train.csv'
    _tstcsv = '../ml_pybrain/case1_send1/inc_attr_30_test.csv'
    #_attrnum = 30 # attrnum is imput dim, get later auto
    _classnum = 2
    # num of hidden layer
    _hidden = 12
    # max epochs
    _maxepochs = 100
    # learn val
    _learningrate = 0.013 # default 0.01
    _momentum = 0.03 # default 0.0, tutorial 0.1
    _lrdecay = 1.0 # default 1.0
    _weightdecay = 0.01 # default 0.0, tutorial 0.01
    # save training log path
    _logpath = 'att30class2_2.log'
    # graph
    _graphymax = 15

    '''#build 3 class
    means = [(-1,0),(2,4),(3,1)]
    cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])]
    alldata = ClassificationDataSet(2, 1, nb_classes=3)
    for n in xrange(400):
        for klass in range(3):
            input = multivariate_normal(means[klass],cov[klass])
            alldata.addSample(input, [klass])

    #split 25% test, 75% train
    tstdata, trndata = alldata.splitWithProportion(0.25)


    ''' #read csv
    with Timer() as readt:
        # read train data
        dictdata = csv.DictReader(open(_trncsv, 'r'))
        data  = [[row[f] for f in dictdata.fieldnames] for row in dictdata]
        # from 0th to last-1 col are training data set
        train = [[float(elm) for elm in row[0:-1]] for row in data]
        # last col is target data set, convert from ONE start to ZERO start
        target = [[int(row[-1])-1] for row in data]
        # get input dim
        _attrnum = len(train[0])
        # set DataSet
        trndata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum)
        trndata.setField('input', train)
        trndata.setField('target', target)

        # read test data
        dictdata = None
        dictdata = csv.DictReader(open(_tstcsv, 'r'))
        data  = [[row[f] for f in dictdata.fieldnames] for row in dictdata]
        # from 0th to last-1 col are training data set
        train = [[float(elm) for elm in row[0:-1]] for row in data]
        # last col is target data set, convert from ONE start to ZERO start
        target = [[int(row[-1])-1] for row in data]
        # set DataSet
        tstdata = ClassificationDataSet(_attrnum, 1, nb_classes=_classnum)
        tstdata.setField('input', train)
        tstdata.setField('target', target)

    #'''

    # 1-of-k representation
    trndata._convertToOneOfMany()
    tstdata._convertToOneOfMany()

    print "Number of training patterns: ", len(trndata)
    print "Input and output dimensions: ", trndata.indim, trndata.outdim
    print "First sample (input, target, class):"
    print trndata['input'][0], trndata['target'][0], trndata['class'][0]

    # build network and tariner
    fnn = buildNetwork( trndata.indim, _hidden, trndata.outdim, outclass=SoftmaxLayer)
    #trainer = BackpropTrainer(fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01)
    trainer = BackpropTrainer(fnn, dataset=trndata, verbose=True,
        learningrate = _learningrate,
        momentum = _momentum,
        lrdecay = _lrdecay,
        weightdecay = _weightdecay
        )

    # setup graph
    xmax = _maxepochs
    ymax = _graphymax
    figure(figsize=[12,8])
    ion()
    draw()
    graph = MultilinePlotter(xlim=[1, xmax], ylim=[0, ymax])
    graph.setLineStyle([0,1], linewidth=2)
    graph.setLabels(x='epoch', y='error %')
    graph.setLegend(['training', 'test'], loc='upper right')
    graph.update()
    draw()

    # setup storage training curve
    trainx = []
    trny = []
    tsty = []

    # start training
    with Timer() as traint:
        for i in range(_maxepochs):
            # train
            trainer.trainEpochs(1)
            # test by train/test
            trnresult = percentError(trainer.testOnClassData(), trndata['class'])
            tstresult = percentError(trainer.testOnClassData(dataset=tstdata), tstdata['class'])
            print "epoch: %4d" % trainer.totalepochs, \
                "  train error: %5.2f%%" % trnresult, \
                "  test error: %5.2f%%" % tstresult
            # store curve
            trainx.append(i+1)
            trny.append(trnresult)
            tsty.append(tstresult)

            # draw graph
            graph.addData(0, i+1, trnresult)
            graph.addData(1, i+1, tstresult)
            graph.update()
            draw()

    # save log
    f = csv.writer(open(_logpath, 'w'))
    # timer
    f.writerow(['read', readt.secs])
    f.writerow(['training and test(sec)', traint.secs])
    # data prop
    f.writerow(['train data num', len(trndata)])
    f.writerow(['test data num', len(tstdata)])
    f.writerow(['in / out dim', trndata.indim, trndata.outdim])
    # config
    f.writerow(['hidden', _hidden])
    f.writerow(['maxepochs', _maxepochs])
    f.writerow(['learningrate', _learningrate])
    f.writerow(['momentum', _momentum])
    f.writerow(['lrdecay', _lrdecay])
    f.writerow(['weightdecay', _weightdecay])
    # curve
    f.writerow(['epoch', 'train_err', 'test_err'])
    f.writerows([[trainx[r], trny[r], tsty[r]] for r in range(len(trainx))])