def pongMaker(): return pong_env(seqLength=1, nBallsEpisode=20, useSeqLength=False) def netMaker(): net = mxT.a3cHybridSequential(useInitStates= True) net.add(mx.gluon.nn.Conv2D(channels = 32, kernel_size = (3,3), strides = (2,2), padding = (1,1), activation = None, prefix = "c1")) net.add(mx.gluon.nn.ELU()) net.add(mx.gluon.nn.Conv2D(channels = 32, kernel_size = (3,3), strides = (2,2), padding = (1,1), activation = None, prefix = "c2")) net.add(mx.gluon.nn.ELU()) net.add(mx.gluon.nn.Conv2D(channels = 32, kernel_size = (3,3), strides = (2,2), padding = (1,1), activation = None, prefix = "c3")) net.add(mx.gluon.nn.ELU()) net.add(mx.gluon.nn.Conv2D(channels = 32, kernel_size = (3,3), strides = (2,2), padding = (1,1), activation = None, prefix = "c4")) net.add(mx.gluon.nn.ELU()) net.add(mx.gluon.nn.Flatten()) net.add(mxT.a3cLSTM(mx.gluon.rnn.LSTMCell(256, prefix = 'lstm_'))) net.add(mx.gluon.nn.ELU()) net.add(mxT.a3cOutput(n_policy = 3, prefix = "")) net.initialize(init = mx.initializer.Xavier(magnitude = 0.1), ctx= mx.cpu()) ## set inital parameters from per-trained model params = mx.gluon.nn.SymbolBlock.imports(symbol_file = "/home/markus/Documents/Nerding/python/a3c/test/pong/array/1200/net-symbol.json", param_file = "/home/markus/Documents/Nerding/python/a3c/test/pong/array/1200/net-0001.params", input_names = ['data']) net.copyParams(fromNet=params) return(net) mainThread = mT(netMaker = netMaker , envMaker = pongMaker, configFile = 'a3c/test/pong/pong.cfg') mainThread.run()
import mxnetTools as mxT import mxnet as mx from mainThread import mainThread as mT def cartpoleMaker(): return cartpole_env() def netMaker(): net = mxT.a3cHybridSequential() net.add(mx.gluon.rnn.LSTM(hidden_size=32, input_size=4, prefix="lstm_")) net.add(mx.gluon.nn.Activation("relu")) net.add(mxT.a3cOutput(n_policy=2, prefix="")) net.add() net.initialize(init=mx.initializer.Xavier()) return (net) mainThread = mT(netMaker=netMaker, envMaker=cartpoleMaker, configFile='a3c/test/cartpole/cartpole.cfg', verbose=False) mainThread.run() mainThread.save("a3c/test/cartpole/smallModel", overwrite=True, savePlots=True) #after = mainThread.module.get_params()[0]['fullyconnected0_weight'].asnumpy() mainThread.getPerformancePlots("test", overwrite=True)
def netMaker(): net = mxT.a3cHybridSequential(useInitStates=True) net.add( mx.gluon.nn.Conv1D(channels=64, kernel_size=270, strides=270, activation=None, prefix="c1")) net.add(mx.gluon.nn.ELU()) net.add(mx.gluon.nn.Flatten()) net.add(mxT.a3cLSTM(mx.gluon.rnn.LSTMCell(128, prefix='lstm1'))) net.add(mx.gluon.nn.ELU()) net.add(mxT.a3cOutput(n_policy=50, prefix="")) net.initialize(init=mx.initializer.Xavier(magnitude=0.1), ctx=mx.cpu()) ## set inital parameters from per-trained model params = mx.gluon.nn.SymbolBlock.imports( symbol_file= "/home/markus/Documents/Nerding/python/a3c/test/dinner/test50noPadnoIntoleranceNoValidRestriction_continued/final/net-symbol.json", param_file= "/home/markus/Documents/Nerding/python/a3c/test/dinner/test50noPadnoIntoleranceNoValidRestriction_continued/final/net-0001.params", input_names=['data']) net.copyParams(fromNet=params) return (net) mainThread = mT(netMaker=netMaker, envMaker=dinnerMaker, configFile='a3c/test/dinner/dinner.cfg') mainThread.run()
def run(): mainThread = mT(netMaker=netMaker, envMaker=dinnerMaker, configFile='a3c/test/dinner_simple/dinner_simple.cfg') mainThread.run()
dinnerAssigned = assigner.assignDinnerCourses() dinnerTime = datetime(2018, 07, 01, 20) environment = state(data=dinnerAssigned, dinnerTime=dinnerTime, travelMode='simple') tmp = mx.sym.Variable('data') tmp = mx.sym.FullyConnected(data=tmp, num_hidden=100) tmp = mx.sym.Dropout(data=tmp, p=0.2) tmp = mx.sym.Activation(data=tmp, act_type='relu') tmp = mx.sym.FullyConnected(data=tmp, num_hidden=100) tmp = mx.sym.Dropout(data=tmp, p=0.1) tmp = mx.sym.Activation(data=tmp, act_type='relu') mainThread = mT(tmp, environment, 'a3c/test/a3c.cfg', verbose=True) mainThread.run() tmp = mxT.a3cOutput(tmp, environment.getRewards().size) mod = a3cModule(tmp, 13500) data = mxT.state2a3cInput(environment.state) mod.forward(data) mod = mx.mod.Module(tmp) mod.bind(data_shapes=[('data', (1, 10))], label_shapes=[('valueLabel', (1, 1)), ('advantageLabel', (1, environment.getRewards().size))], grad_req='add')