コード例 #1
0
    priorityExponent=0.0,
    batchSize=64,
    maxEpisodes=4096,
    trainSteps=1024,
    minStepsBeforeTraining=4096,
    rewardScaling=(10.0 ** -0.75),
    actionShift=0.0,
    stepsPerUpdate=1,
    render=True,
    showGraphs=False,
    saveModel=False,
    saveModelToS3=False,
    restoreModel=True,
    train=False,
    testSteps=1024,
    maxMinutes=360,
    targetEntropy=-4.0,
    maxGradientNorm=5.0,
    meanRegularizationConstant=0.0,
    varianceRegularizationConstant=0.0,
    randomStartSteps=0,
    gradientSteps=1,
    initialExtraNoise=0,
    extraNoiseDecay=0,
    evaluationEvery=25,
    numFinalEvaluations=10,
    maxTrainSteps = 1000000
)

print("Total Reward: "+str(agent.execute()))
コード例 #2
0
              batchSize=64,
              nStep=3,
              frameSkip=2,
              maxEpisodes=4096,
              trainSteps=1024,
              maxTrainSteps=6000000,
              minStepsBeforeTraining=10000,
              rewardScaling=(10.0**-0.75),
              actionShift=0.0,
              stepsPerUpdate=1,
              render=False,
              showGraphs=True,
              saveModel=True,
              saveModelToS3=False,
              restoreModel=False,
              train=True,
              testSteps=1024,
              maxMinutes=360,
              targetEntropy=-4.0,
              maxGradientNorm=5.0,
              meanRegularizationConstant=0.0,
              varianceRegularizationConstant=0.0,
              randomStartSteps=10000,
              gradientSteps=1,
              initialExtraNoise=0,
              extraNoiseDecay=0,
              evaluationEvery=25,
              numFinalEvaluations=10)

results = agent.execute()
コード例 #3
0
                  qNetworkSize=[256, 256],
                  valueNetworkSize=[256, 256],
                  entropyCoefficient=entropyCoefficient,
                  valueNetworkLearningRate=learningRate,
                  policyNetworkLearningRate=learningRate,
                  qNetworkLearningRate=learningRate,
                  tau=0.005,
                  gamma=0.99,
                  maxMemoryLength=int(1e6),
                  priorityExponent=0,
                  batchSize=256,
                  maxGradientNorm=5,
                  maxEpisodes=1024,
                  trainSteps=1024,
                  minStepsBeforeTraining=4096,
                  rewardScaling=rewardScaling,
                  actionScaling=actionScaling,
                  actionShift=0.0,
                  stepsPerUpdate=1,
                  render=True,
                  showGraphs=True,
                  meanRegularizationConstant=weightRegularizationConstant,
                  varianceRegularizationConstant=weightRegularizationConstant,
                  testSteps=1024,
                  maxMinutes=600)

    result = agent.execute()
    print("Result: " + result)
except:
    print("Error evaluating parameters")
    result = -20000