Esempi in Python per Utils.testAgent

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Pendulum

Classe/tipologia: Utils

Metodo/funzione: testAgent

Esempi su hotexamples.com: 3

Utils.testAgent in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per Pendulum.Utils.testAgent, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

testAgent(3)

showAgentPlay(1)

trackScores(1)

Esempio n. 1

Mostra file

TRAIN_EPISODES = 200
TEST_EPISODES = 1
EPOCHS = 100
NOISE_STD = 0.1
NOISE_STD_DECAY = 0.99
TAU = 0.005

memory = CebLinear(maxSize=50000, sampleWeight='same')
metrics = {}
for epoch in range(EPOCHS):
    print('Start of %d epoch. Noise std: %.3f' % (epoch, NOISE_STD))
    ##################
    print('Testing...')
    scores = Utils.testAgent(RawActionAgent(model,
                                            processor=addNoise(NOISE_STD)),
                             memory,
                             TEST_EPISODES,
                             env=RawPendulumEnvironment)
    Utils.trackScores(scores, metrics)
    ##################
    # train model
    lossesActor = []
    lossesCritic = []
    for _ in range(TRAIN_EPISODES):
        states, actions, rewards, nextStates, nextStateScoreMultiplier = memory.sampleBatch(
            BATCH_SIZE)
        nextStateScoreMultiplier = tf.convert_to_tensor(
            nextStateScoreMultiplier * GAMMA, dtype=tf.float32)
        rewards = tf.convert_to_tensor(rewards, dtype=tf.float32)

        lossCritic, lossActor = model.fit(states, actions, rewards, nextStates,

Esempio n. 2

Mostra file

for epoch in range(EPOCHS):
  print('Start of %d epoch. Explore rate: %.3f' % (epoch, EXPLORE_RATE))
  # for stability
  modelClone.set_weights(model.get_weights())
  lossSum = 0
  for _ in range(TRAIN_EPISODES):
    states, actions, rewards, nextStates, nextStateScoreMultiplier = memory.sampleBatch(
      batch_size=BATCH_SIZE, maxSamplesFromEpisode=16
    )
    actions = ACTIONS.toIndex(actions)
    
    futureScores = modelClone.predict(nextStates).max(axis=-1) * nextStateScoreMultiplier
    targets = modelClone.predict(states)
    targets[np.arange(len(targets)), actions] = rewards + futureScores * GAMMA

    lossSum += model.fit(states, targets, epochs=1, verbose=0).history['loss'][0]
  print('Avg. train loss: %.4f' % (lossSum / TRAIN_EPISODES))
  ##################
  print('Testing...')
  scores = Utils.testAgent(
    DQNAgent(model, actions=ACTIONS, exploreRate=EXPLORE_RATE),
    memory, TEST_EPISODES
  )
  Utils.trackScores(scores, metrics)
  ##################
  if (epoch % 10) == 0: # debug
    Utils.showAgentPlay( DQNAgent(model, actions=ACTIONS, exploreRate=0) )
  ##################
  EXPLORE_RATE = max((0.001, EXPLORE_RATE * EXPLORE_RATE_DECAY))
  plotData2file(metrics, 'chart.jpg')

Esempio n. 3

Mostra file

File: train.py Progetto: GreenWizard2015/openai-gym-exercise

STEPS_PER_EPISODE = 200
BOOTSTRAPPED_STEPS = 10

metrics = {}

env = PendulumEnvironment()
memory = EB.CebLinear(maxSize=10 * TEST_EPISODES * STEPS_PER_EPISODE,
                      sampleWeight='abs')
curiosityModel = CCuriosityIRWatched(CCuriosityIR(layersSizes=[10, 10, 10]))
processor = replayProcessor(curiosityModel,
                            rewardScale=1.0 / BOOTSTRAPPED_STEPS,
                            normalize=True)
# collect random experience
for episodeN in range(2):
    Utils.testAgent(RandomAgent(low=-1, high=1),
                    memory,
                    episodes=100,
                    processor=processor)
print('random experience collected')
####################
model = createFatModel(input_shape=(3, ), output_size=ACTIONS.N)
model.compile(optimizer=tf.optimizers.Adam(lr=1e-4),
              loss=tf.keras.losses.Huber(delta=1.0))

ghostNetwork = GhostNetwork(model, mixer='hard')
for epoch in range(EPOCHS):
    print('Start of %d epoch. Explore rate: %.3f' % (epoch, EXPLORE_RATE))
    ##################
    # Training
    ghostNetwork.update()
    trainLoss = train(
        ghostNetwork, memory, {