예제 #1
0
def allTests(logger):

	try:
		cfg  = json.load(open('../config/modules/tests.json'))['params']

		agent  = rA.randomDiscreteActor((37,), 4)
		rAgent = rA.randomDiscreteActor((37,), 4)
		egPolicy = policy.epsGreedyPolicy( agent, rAgent )

		# At any point this policy can be changed ...
		policy1 = lambda states: egPolicy.act( states , 0.1)
		memoryBuffer = RB.SimpleReplayBuffer(1000)

		print('Starting to generate memories ...')
		print('----------------------------------------')
		with envUnity.Env(cfg['policyParams']['binaryFile'], showEnv=False) as env:

			for _ in range(10):
				print('[Generating Memories] ', end='', flush=True)
				allResults = env.episode(policy1, maxSteps = 1000)
				memoryBuffer.appendAllAgentResults( allResults )
				
				print( 'Memory Buffer lengths: {}'.format( memoryBuffer.shape ) )
				

	except Exception as e:
		logger.error(f'Unable to finish Memory Buffer tests: {e}')


	return
예제 #2
0
def allTests(logger):

	try:
		cfg  = json.load(open('../config/modules/tests.json'))['params']

		policy = lambda m: eval( cfg['UnityParams']['randomAction'] )

		with envUnity.Env(cfg['UnityParams']['binaryFile'], showEnv=True) as env:

			results = env.episode(policy, maxSteps = 1000)
			for i in  range(env.num_agents):
				print(f'For agent {i}:')
				s, a, r, ns, d = zip(*results[i])
				print(f'Rewards: {r}')
				print(f'Donnes: {d}')

	except Exception as e:
		logger.error(f'Unable to finish Unity tests: {e}')


	return
예제 #3
0
def doSomething(logger):
    '''print a line
    
    This function simply prints a single line
    
    Parameters
    ----------
    logger : {logging.Logger}
        The logger used for logging error information
    '''

    cConfig = json.load(open('../config/modules/playAgent.json'))['params']
    print(cConfig)

    memoryBuffer = RB.SimpleReplayBuffer(cConfig['memorySize'])

    with envUnity.Env(cConfig['binary'], showEnv=True, trainMode=False) as env:

        results = env.episode(policy, cConfig['maxSteps'])[0]
        s, a, r, ns, f = zip(*results)
        print(r)

    return
예제 #4
0
def trainAgentUnity(logger, configAgent):

    try:

        functionMaps = {
            'relu': F.relu,
            'relu6': F.relu6,
            'elu': F.elu,
            'celu': F.celu,
            'selu': F.selu,
            'prelu': F.prelu,
            'rrelu': F.rrelu,
            'glu': F.glu,
            'tanh': torch.tanh,
            'hardtanh': F.hardtanh
        }

        # Config parameters
        # --------------------------
        memorySize = configAgent['memorySize']
        binary = configAgent['binary']
        nIterations = configAgent['nIterations']
        initMemoryIterations = configAgent['initMemoryIterations']
        eps0 = configAgent['eps0']
        epsDecay = configAgent['epsDecay']
        minEps = configAgent['minEps']
        maxSteps = configAgent['maxSteps']
        nSamples = configAgent['nSamples']
        Tau = configAgent['Tau']
        inpSize = configAgent['inpSize']
        outSize = configAgent['outSize']
        hiddenSizes = configAgent['hiddenSizes']
        hiddenActivations = configAgent['hiddenActivations']
        lr = configAgent['lr']

        hiddenActivations = [functionMaps[m] for m in hiddenActivations]

        slidingScore = deque(maxlen=100)

        allResults = {"scores": [], "slidingScores": []}

        QNslow = qN.qNetworkDiscrete(inpSize,
                                     outSize,
                                     hiddenSizes,
                                     activations=hiddenActivations,
                                     lr=lr)
        QNfast = qN.qNetworkDiscrete(inpSize,
                                     outSize,
                                     hiddenSizes,
                                     activations=hiddenActivations,
                                     lr=lr)
        with envUnity.Env(binary, showEnv=False) as env:
            memoryBuffer = RB.SimpleReplayBuffer(memorySize)
            agent = dqn.Agent_DQN(env,
                                  memoryBuffer,
                                  QNslow,
                                  QNfast,
                                  numActions=4,
                                  gamma=1,
                                  device='cuda:0')
            agent.eval()

            policy = lambda m: [agent.randomAction(m)]

            print('Generating some initial memory ...')
            for i in tqdm(range(initMemoryIterations)):
                score = agent.memoryUpdateEpisode(policy,
                                                  maxSteps=maxSteps,
                                                  minScoreToAdd=1)
                tqdm.write(f'score = {score}')

            eps = eps0
            print('Optimizing model ...')
            for i in tqdm(range(nIterations)):

                eps = max(minEps, epsDecay * eps)  # decrease epsilon

                policy = lambda m: [agent.epsGreedyAction(m, eps)]
                agent.memoryUpdateEpisode(policy, maxSteps=maxSteps)

                agent.step(nSamples=nSamples)
                agent.softUpdate(Tau)

                # Calculate Score
                results = env.episode(lambda m: [agent.maxAction(m)],
                                      maxSteps)[0]
                s, a, r, ns, f = zip(*results)
                score = sum(r)
                slidingScore.append(score)
                tqdm.write(
                    'score = {}, max = {}, sliding score = {}, eps = {}'.
                    format(score, max(r), np.mean(slidingScore), eps))

                allResults['scores'].append(score)
                allResults['slidingScores'].append(np.mean(slidingScore))

            # env.env.close()

        return allResults

    except Exception as e:
        logger.error(f'Unable to train the agent: {e}')

    return
예제 #5
0
def testAllAgents(logger):
    '''print a line
    
    This function simply prints a single line
    
    Parameters
    ----------
    logger : {logging.Logger}
        The logger used for logging error information
    '''

    try:

        cfg = json.load(open('../config/modules/testAgents.json'))['params']
        policy = lambda m: eval(cfg['agentParams']['randomAction'])
        memoryBuffer = RB.SimpleReplayBuffer(1000)
        QNslow = qN.qNetworkDiscrete(37,
                                     4, [50, 30, 10],
                                     activations=[F.tanh, F.tanh, F.tanh])
        QNfast = qN.qNetworkDiscrete(37,
                                     4, [50, 30, 10],
                                     activations=[F.tanh, F.tanh, F.tanh])

        with envUnity.Env(cfg['agentParams']['binaryFile'],
                          showEnv=False) as env:

            agent = dqn.Agent_DQN(env, memoryBuffer, QNslow, QNfast, 4, 1)
            agent.eval()
            eps = 0.999
            policy = lambda m: [agent.epsGreedyAction(m, eps)]

            print('Starting to generate memories ...')
            print('----------------------------------------')
            for _ in range(3):
                print('[Generating Memories] ', end='', flush=True)
                score = agent.memoryUpdateEpisode(policy, maxSteps=1000)
                print('Memory Buffer lengths: {}\nScore: {}'.format(
                    agent.memory.shape, score))

            print('Sampling from the memory:')
            memories = agent.memory.sample(20)
            s, a, r, ns, f = zip(*memories)
            s = np.array(s)

            print('Sampled some states of size {}'.format(s.shape))
            print('Finding the maxAction ....')
            s = torch.as_tensor(s.astype(np.float32))
            result1 = agent.randomAction(s)
            result2 = agent.maxAction(s)
            result3 = agent.epsGreedyAction(s, 0.5)

            print('Random Actioon stuff ......')
            results4 = env.episode(lambda m: [agent.randomAction(m)], 10)[0]
            # s, a, r, ns, f = zip(*results4)
            # print(s)

            print('Max Actioon stuff ......')
            results4 = env.episode(lambda m: [agent.maxAction(m)], 10)
            # print(len(results4))

            print('epsGreedy Actioon stuff ......')
            results4 = env.episode(lambda m: [agent.epsGreedyAction(m, 1)], 10)

            print('Load/Save a model')
            agent.save('../models', 'someName')
            agent.load('../models', 'someName')

            print('Doing a soft update')
            agent.step(nSamples=100)
            agent.softUpdate(0.2)
            print('Finished a soft update')

            # agent.step(nSamples = 10)

    except Exception as e:
        logger.error(f'Unable to test all agents: {e}')

    return