def allTests(logger): try: cfg = json.load(open('../config/modules/tests.json'))['params'] agent = rA.randomDiscreteActor((37,), 4) rAgent = rA.randomDiscreteActor((37,), 4) egPolicy = policy.epsGreedyPolicy( agent, rAgent ) # At any point this policy can be changed ... policy1 = lambda states: egPolicy.act( states , 0.1) memoryBuffer = RB.SimpleReplayBuffer(1000) print('Starting to generate memories ...') print('----------------------------------------') with envUnity.Env(cfg['policyParams']['binaryFile'], showEnv=False) as env: for _ in range(10): print('[Generating Memories] ', end='', flush=True) allResults = env.episode(policy1, maxSteps = 1000) memoryBuffer.appendAllAgentResults( allResults ) print( 'Memory Buffer lengths: {}'.format( memoryBuffer.shape ) ) except Exception as e: logger.error(f'Unable to finish Memory Buffer tests: {e}') return
def allTests(logger): try: cfg = json.load(open('../config/modules/tests.json'))['params'] policy = lambda m: eval( cfg['UnityParams']['randomAction'] ) with envUnity.Env(cfg['UnityParams']['binaryFile'], showEnv=True) as env: results = env.episode(policy, maxSteps = 1000) for i in range(env.num_agents): print(f'For agent {i}:') s, a, r, ns, d = zip(*results[i]) print(f'Rewards: {r}') print(f'Donnes: {d}') except Exception as e: logger.error(f'Unable to finish Unity tests: {e}') return
def doSomething(logger): '''print a line This function simply prints a single line Parameters ---------- logger : {logging.Logger} The logger used for logging error information ''' cConfig = json.load(open('../config/modules/playAgent.json'))['params'] print(cConfig) memoryBuffer = RB.SimpleReplayBuffer(cConfig['memorySize']) with envUnity.Env(cConfig['binary'], showEnv=True, trainMode=False) as env: results = env.episode(policy, cConfig['maxSteps'])[0] s, a, r, ns, f = zip(*results) print(r) return
def trainAgentUnity(logger, configAgent): try: functionMaps = { 'relu': F.relu, 'relu6': F.relu6, 'elu': F.elu, 'celu': F.celu, 'selu': F.selu, 'prelu': F.prelu, 'rrelu': F.rrelu, 'glu': F.glu, 'tanh': torch.tanh, 'hardtanh': F.hardtanh } # Config parameters # -------------------------- memorySize = configAgent['memorySize'] binary = configAgent['binary'] nIterations = configAgent['nIterations'] initMemoryIterations = configAgent['initMemoryIterations'] eps0 = configAgent['eps0'] epsDecay = configAgent['epsDecay'] minEps = configAgent['minEps'] maxSteps = configAgent['maxSteps'] nSamples = configAgent['nSamples'] Tau = configAgent['Tau'] inpSize = configAgent['inpSize'] outSize = configAgent['outSize'] hiddenSizes = configAgent['hiddenSizes'] hiddenActivations = configAgent['hiddenActivations'] lr = configAgent['lr'] hiddenActivations = [functionMaps[m] for m in hiddenActivations] slidingScore = deque(maxlen=100) allResults = {"scores": [], "slidingScores": []} QNslow = qN.qNetworkDiscrete(inpSize, outSize, hiddenSizes, activations=hiddenActivations, lr=lr) QNfast = qN.qNetworkDiscrete(inpSize, outSize, hiddenSizes, activations=hiddenActivations, lr=lr) with envUnity.Env(binary, showEnv=False) as env: memoryBuffer = RB.SimpleReplayBuffer(memorySize) agent = dqn.Agent_DQN(env, memoryBuffer, QNslow, QNfast, numActions=4, gamma=1, device='cuda:0') agent.eval() policy = lambda m: [agent.randomAction(m)] print('Generating some initial memory ...') for i in tqdm(range(initMemoryIterations)): score = agent.memoryUpdateEpisode(policy, maxSteps=maxSteps, minScoreToAdd=1) tqdm.write(f'score = {score}') eps = eps0 print('Optimizing model ...') for i in tqdm(range(nIterations)): eps = max(minEps, epsDecay * eps) # decrease epsilon policy = lambda m: [agent.epsGreedyAction(m, eps)] agent.memoryUpdateEpisode(policy, maxSteps=maxSteps) agent.step(nSamples=nSamples) agent.softUpdate(Tau) # Calculate Score results = env.episode(lambda m: [agent.maxAction(m)], maxSteps)[0] s, a, r, ns, f = zip(*results) score = sum(r) slidingScore.append(score) tqdm.write( 'score = {}, max = {}, sliding score = {}, eps = {}'. format(score, max(r), np.mean(slidingScore), eps)) allResults['scores'].append(score) allResults['slidingScores'].append(np.mean(slidingScore)) # env.env.close() return allResults except Exception as e: logger.error(f'Unable to train the agent: {e}') return
def testAllAgents(logger): '''print a line This function simply prints a single line Parameters ---------- logger : {logging.Logger} The logger used for logging error information ''' try: cfg = json.load(open('../config/modules/testAgents.json'))['params'] policy = lambda m: eval(cfg['agentParams']['randomAction']) memoryBuffer = RB.SimpleReplayBuffer(1000) QNslow = qN.qNetworkDiscrete(37, 4, [50, 30, 10], activations=[F.tanh, F.tanh, F.tanh]) QNfast = qN.qNetworkDiscrete(37, 4, [50, 30, 10], activations=[F.tanh, F.tanh, F.tanh]) with envUnity.Env(cfg['agentParams']['binaryFile'], showEnv=False) as env: agent = dqn.Agent_DQN(env, memoryBuffer, QNslow, QNfast, 4, 1) agent.eval() eps = 0.999 policy = lambda m: [agent.epsGreedyAction(m, eps)] print('Starting to generate memories ...') print('----------------------------------------') for _ in range(3): print('[Generating Memories] ', end='', flush=True) score = agent.memoryUpdateEpisode(policy, maxSteps=1000) print('Memory Buffer lengths: {}\nScore: {}'.format( agent.memory.shape, score)) print('Sampling from the memory:') memories = agent.memory.sample(20) s, a, r, ns, f = zip(*memories) s = np.array(s) print('Sampled some states of size {}'.format(s.shape)) print('Finding the maxAction ....') s = torch.as_tensor(s.astype(np.float32)) result1 = agent.randomAction(s) result2 = agent.maxAction(s) result3 = agent.epsGreedyAction(s, 0.5) print('Random Actioon stuff ......') results4 = env.episode(lambda m: [agent.randomAction(m)], 10)[0] # s, a, r, ns, f = zip(*results4) # print(s) print('Max Actioon stuff ......') results4 = env.episode(lambda m: [agent.maxAction(m)], 10) # print(len(results4)) print('epsGreedy Actioon stuff ......') results4 = env.episode(lambda m: [agent.epsGreedyAction(m, 1)], 10) print('Load/Save a model') agent.save('../models', 'someName') agent.load('../models', 'someName') print('Doing a soft update') agent.step(nSamples=100) agent.softUpdate(0.2) print('Finished a soft update') # agent.step(nSamples = 10) except Exception as e: logger.error(f'Unable to test all agents: {e}') return