def objective(args): NUM_TESTS_FOR_NOISE = 1 env = gym.make('LunarLander-v2') learningRate = args # numIntermediateLayers = int(numIntermediateLayers) # intermediateLayerSize = int(intermediateLayerSize) # finalLayerSize = int(finalLayerSize) # layers = [] # for i in range(numIntermediateLayers): # layers.append(intermediateLayerSize) # layers.append(finalLayerSize) # print("Layers: ",layers) # print("Priority: ",priorityExponent) # print("LR: ",learningRate) totalResult = 0 for i in range(NUM_TESTS_FOR_NOISE): sess = tf.Session() a = Agent( sess=sess, env=env, numAvailableActions=4, numObservations=8, rewardsMovingAverageSampleLength=20, gamma=1, nStepUpdate=1, includeIntermediatePairs=False, maxRunningMinutes=30, # test parameters episodesPerTest=1, numTestPeriods=40000, numTestsPerTestPeriod=30, episodeStepLimit=1024, intermediateTests=False, render=False, showGraph=False, # hyperparameters valueMin=-400.0, valueMax=300.0, numAtoms=14, maxMemoryLength=100000, batchSize=256, networkSize=[128, 128, 256], learningRate=learningRate, priorityExponent=0, epsilonInitial = 2, epsilonDecay = .9987, minFramesForTraining = 2048, noisyLayers = False, maxGradientNorm = 4, minExploration = .15, ) testResults = np.array(a.execute()) performance = np.mean(testResults[np.argpartition(-testResults,range(4))[:4]]) totalResult = totalResult + performance print(str(learningRate)+","+str(performance)) return -totalResult
# agentName="agent_842763505", # hyperparameters rewardScaling=rewardScaling, nStepReturns=1, maxMemoryLength=int(1e6), batchSize=64, learningRate=6.25e-4, priorityExponent=0, epsilonInitial=1, epsilonDecay=.999975, minExploration=.01, maxExploration=1.0, minFramesForTraining=2048, maxGradientNorm=5, preNetworkSize=[128, 128], postNetworkSize=[256], numQuantiles=8, embeddingDimension=16, kappa=1.0, trainingIterations=3, tau=0.001) performance = a.execute()[0] cur = db.cursor() cur.execute( "insert into experiments (label, x1, x2, x3, x4, y) values ('{0}', '{1}', '{2}', '{3}', '{4}', '{5}')" .format(experimentName, rewardScaling, 0, 0, 0, performance)) db.commit() cur.close() db.close()
intermediateTests=False, render=False, showGraph=True, saveModel=True, loadModel=False, disableRandomActions=False, disableTraining=False, # agentName="agent_223120799", # hyperparameters rewardScaling=pow(10, -.75), nStepReturns=1, maxMemoryLength=int(1e6), batchSize=64, learningRate=6.25e-4, priorityExponent=0, epsilonInitial=1, epsilonDecay=.999, minExploration=.01, maxExploration=1.0, minFramesForTraining=2048, maxGradientNorm=5, preNetworkSize=[256, 256], postNetworkSize=[512], numQuantiles=32, embeddingDimension=64, kappa=1.0, trainingIterations=3, tau=0.001) testResults = a.execute()