import gym import Agent import random import numpy as np def RandomActionDiscrete(ActionProp): ActionSize = len(ActionProp) Action = np.random.choice(np.arange(ActionSize), p=ActionProp) ActionOut = np.zeros(ActionSize) ActionOut[Action] = 1 return Action, ActionOut env = gym.make("CartPole-v1") Ag = Agent.ACPGAg(2, 4) EpisodeTime = 200 CurrentEpisode = 0 DoneTimes = 0 while True: Observation = env.reset() O_array = [] R_array = [] A_array = [] AccReward = 0 CurrentEpisode += 1 if CurrentEpisode % 50 == 0: print(DoneTimes) DoneTimes = 0 for t in range(EpisodeTime):
import random import numpy as np def RandomActionDiscrete(ActionProp): ActionSize = len(ActionProp) ActionOut = np.zeros(ActionSize) rand = random.uniform(0,1) for ActionIndex in range(ActionSize): if rand < ActionProp[ActionIndex]: ActionOut[ActionIndex] = 1 return ActionIndex, ActionOut ActionOut[ActionSize-1] = 1 return ActionSize-1, ActionOut env = gym.make("Acrobot-v1") Ag = Agent.ACPGAg(3, 6) CurrentEpisode = 0 while True: Observation = env.reset() O_array = [] R_array = [] A_array = [] CurrentEpisode += 1 rememberTimeStep = 0 StepTime = 0 score = 0 while True: StepTime += 1 if CurrentEpisode%100 == 0: env.render()