Example #1
0
    def __init__(self, config, localNet, env, globalNets, globalOptimizer, netLossFunc, nbAction, rank,
                 globalEpisodeCount, globalEpisodeReward, globalRunningAvgReward, resultQueue, logFolder,
                stateProcessor = None, lock = None):

        self.globalPolicyNet = globalNets[0]
        self.globalTargetNet = globalNets[1]
        self.rank = rank
        self.globalOptimizer = globalOptimizer
        self.localNet = localNet

        mp.Process.__init__(self)
        DQNAgent.__init__(self. config, localNet, None, None, netLossFunc, nbAction, stateProcessor, )


        self.totalStep = 0
        self.updateGlobalFrequency = 10
        if 'updateGlobalFrequency' in self.config:
            self.updateGlobalFrequency = self.config['updateGlobalFrequency']


        self.globalEpisodeCount = globalEpisodeCount
        self.globalEpisodeReward = globalEpisodeReward
        self.globalRunningAvgReward = globalRunningAvgReward
        self.resultQueue = resultQueue
        self.dirName = logFolder

        self.randomSeed = 1 + self.rank
        if 'randomSeed' in self.config:
            self.randomSeed = self.config['randomSeed'] + self.rank
        torch.manual_seed(self.randomSeed)

        self.nStepForward = 1
        if 'nStepForward' in self.config:
            self.nStepForward = self.config['nStepForward']
        self.targetNetUpdateEpisode = 10
        if 'targetNetUpdateEpisode' in self.config:
            self.targetNetUpdateEpisode = self.config['targetNetUpdateEpisode']

        self.nStepBuffer = []

        # only use vanilla replay memory
        self.memory = ReplayMemory(self.memoryCapacity)

        self.priorityMemoryOption = False

        # use synthetic lock or not
        self.synchLock = False
        if 'synchLock' in self.config:
            self.synchLock = self.config['synchLock']

        self.lock = lock

        self.device = 'cpu'
        if 'device' in self.config and torch.cuda.is_available():
            self.device = self.config['device']
            torch.cuda.manual_seed(self.randomSeed)
            self.localNet = self.localNet.cuda()
Example #2
0
    def init_memory(self):

        if self.priorityMemoryOption:
            self.memory = PrioritizedReplayMemory(self.memoryCapacity, self.config)
        else:
            # most commonly experience replay memory
            if self.memoryOption == 'natural':
                self.memory = ReplayMemory(self.memoryCapacity)
            elif self.memoryOption == 'reward':
                self.memory = ReplayMemoryReward(self.memoryCapacity, self.config['rewardMemoryBackupStep'],
                                                 self.gamma, self.config['rewardMemoryTerminalRatio'] )
 def init_memory(self):
     self.memory = ReplayMemory(self.memoryCapacity)
Example #4
0
from Agents.Core.ReplayMemory import ReplayMemory, Transition
#from ..Agents.Core.ReplayMemory import ReplayMemory, Transition
import torch
import numpy as np
import pickle

state1 = np.random.rand(5, 5)
state2 = np.random.rand(5, 5)
state3 = np.random.rand(5, 5)
state4 = np.random.rand(5, 5)

tran1 = Transition(state1, 1, state2, 1)
tran2 = Transition(state3, 2, state4, 2)
memory = ReplayMemory(10)
memory.push(tran1)
memory.push(tran2)
print(memory)

file = open('memory.pickle', 'wb')
pickle.dump(memory, file)
file.close()

with open('memory.pickle', 'rb') as file:
    memory2 = pickle.load(file)

print(memory2)
 def init_memory(self):
     self.memories = [ReplayMemory(self.memoryCapacity) for _ in range(self.episodeLength)]
                 env,
                 optimizer,
                 torch.nn.MSELoss(reduction='none'),
                 N_A,
                 config=config)

xSet = np.linspace(-1, 1, 100)
policy = np.zeros_like(xSet)
for i, x in enumerate(xSet):
    policy[i] = agent.getPolicy(np.array([x]))

np.savetxt('StabilizerPolicyBeforeTrain.txt', policy, fmt='%d')

#agent.perform_random_exploration(10)
agent.train()
storeMemory = ReplayMemory(100000)
agent.testPolicyNet(100, storeMemory)
storeMemory.write_to_text('testPolicyMemory.txt')


def customPolicy(state):
    x = state[0]
    # move towards negative
    if x > 0.1:
        action = 2
    # move towards positive
    elif x < -0.1:
        action = 1
    # do not move
    else:
        action = 0
Example #7
0
 def init_memory(self):
     '''
     initialize replay memory
     '''
     self.memory = ReplayMemory(self.memoryCapacity)
    def init_memory(self):

        if self.memoryOption != 'natural':
            raise NotImplementedError

        self.memory = ReplayMemory(self.memoryCapacity)