Python Memory.getLastMemory Examples

Programming Language: Python

Namespace/Package Name: memory

Class/Type: Memory

Method/Function: getLastMemory

Examples at hotexamples.com: 1

Python Memory.getLastMemory - 1 examples found. These are the top rated real world Python examples of memory.Memory.getLastMemory extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Memory(30)

add(30)

append(16)

batch_update(10)

clear(9)

__init__(8)

clear_stmemory(5)

clear_memory(4)

addMemory(4)

add_single(3)

ai_features(3)

add_sample(3)

newLocalMemory(2)

append_memory(2)

getIP(2)

freeLocalMemory(2)

allocate(2)

alloc(2)

forget(2)

addprocess(2)

load_cartridge(2)

add_to_memory(2)

deleteFile(2)

choosepagevm(2)

awakeLocalMemory(2)

defragment(2)

addReminder(2)

test(2)

sleepLocalMemory(2)

setLocalMemory(2)

setGlobalMemory(2)

addIP(2)

tofile(1)

symbol(1)

get_int_value(1)

setupIO(1)

writeMemory(1)

get_float_value(1)

get_byte(1)

get_bool_value(1)

getLastMemory(1)

getByte(1)

dump_db(1)

use_memory(1)

getArea(1)

get_pointer_value(1)

disasm(1)

symbolXML(1)

syscall(1)

end_memorizing(1)

Example #1

Show file

File: deepq.py Project: vyraun/deep-q-learning

class DeepQ:
    def __init__(self, environment, inputs):
        self.environment = environment
        self.state_size = inputs
        self.nr_actions = environment.action_space.n
        self.memory = Memory(30000)
        self.discountFactor = 0.975
        self.predictionModels = []
   
    def initImaginationNetworks(self):
        for t in xrange(self.nr_actions):
            self.predictionModels.insert(t, self.createModel(self.state_size, self.state_size, [self.state_size, self.state_size, self.state_size], "relu", 0.01))

    def initRewardNetwork(self):
        self.rewardModel = self.createModel(self.state_size, 1, [self.state_size, self.state_size, self.state_size], "relu", 0.01)

    def createModel(self, inputs, outputs, hiddenLayers, activationType, learningRate):
        model = Sequential()
        if len(hiddenLayers) == 0: 
            model.add(Dense(outputs, input_shape=(inputs,), init='lecun_uniform'))
            model.add(Activation("linear"))
        else :
            model.add(Dense(hiddenLayers[0], input_shape=(inputs,), init='lecun_uniform'))
            
            if (activationType == "LeakyReLU") :
                model.add(LeakyReLU(alpha=0.01))
            else :
                model.add(Activation(activationType))
            
            for index in range(1, len(hiddenLayers)-1):
                layerSize = hiddenLayers[index]
                model.add(Dense(layerSize, init='lecun_uniform'))
                if (activationType == "LeakyReLU") :
                    model.add(LeakyReLU(alpha=0.01))
                else :
                    model.add(Activation(activationType))
            model.add(Dense(outputs, init='lecun_uniform'))
            model.add(Activation("linear"))
        optimizer = optimizers.RMSprop(lr=learningRate, rho=0.9, epsilon=1e-06)
        model.compile(loss="mse", optimizer=optimizer)
        return model

    def backupNetwork(self, model, backup):
        weightMatrix = []
        for layer in self.model.layers:
            weights = layer.get_weights()
            weightMatrix.append(weights)
        i = 0
        for layer in self.secondBrain.layers:
            weights = weightMatrix[i]
            layer.set_weights(weights)
            i += 1

    def getStatePrediction(self, state, action):
        predicted = self.predictionModels[action].predict(state.reshape(1,len(state)))
        return predicted[0]

    def getPredictedStates(self, state):
        predictedStates = []
        for a in xrange(self.nr_actions):
            predictedStates.insert(a, self.getStatePrediction(state, a))
        return predictedStates

    def getStateValuePrediction(self, state):
        predictedReward = self.rewardModel.predict(state.reshape(1,len(state)))
        return predictedReward[0][0]

    def getPredictedActionValues(self, state):
        predictedActionValues = []
        for a in xrange(self.nr_actions):
            predictedActionValues.insert(a, self.getStateValuePrediction(self.getStatePrediction(state, a)))
        return predictedActionValues

    def getMaxValue(self, array):
        return np.max(array)

    def getMaxIndex(self, array):
        return np.argmax(array)

    def getTarget(self, state, reward, isFinal):
        if isFinal:
            return reward
        else:
            predictedActionValues = self.getPredictedActionValues(state)
            # return reward + self.discountFactor * (sum(predictedActionValues)/len(predictedActionValues))
            return reward + self.discountFactor * np.max(predictedActionValues)


    def printStatePredictionTree(self, state):
        root = Tree()
        # first layer
        predicted1 = self.getPredictedStates(state)
        root.data = state
        root.left = Tree()
        root.left.data = predicted1[0]
        root.right = Tree()
        root.right.data = predicted1[1]

        # second layer
        predicted2left = self.getPredictedStates(predicted1[0])
        root.left.left = Tree()
        root.left.left.data = predicted2left[0]
        root.left.right = Tree()
        root.left.right.data = predicted2left[1]
        predicted2right = self.getPredictedStates(predicted1[1])
        root.right.left = Tree()
        root.right.left.data = predicted2right[0]
        root.right.right = Tree()
        root.right.right.data = predicted2right[1]

        print ""
        print "\t\t\t\t\t\t\t\t\t\t",root.data
        print "\t\t\t\t",root.left.data,"\t\t\t\t\t\t\t",root.right.data
        print root.left.left.data,"\t",root.left.right.data,"\t",root.right.left.data,"\t",root.right.right.data

    def printStateValueTree(self, state):
        root = Tree()
        # first layer
        predicted1 = self.getPredictedStates(state)
        root.data = state
        root.left = Tree()
        root.left.data = predicted1[0]
        root.right = Tree()
        root.right.data = predicted1[1]

        # second layer
        predicted2left = self.getPredictedStates(predicted1[0])
        root.left.left = Tree()
        root.left.left.data = predicted2left[0]
        root.left.right = Tree()
        root.left.right.data = predicted2left[1]
        predicted2right = self.getPredictedStates(predicted1[1])
        root.right.left = Tree()
        root.right.left.data = predicted2right[0]
        root.right.right = Tree()
        root.right.right.data = predicted2right[1]

        print ""
        print "\t\t\t\t\t\t\t\t\t\t",self.getStateValuePrediction(root.data)
        print "\t\t\t\t",self.getStateValuePrediction(root.left.data),"\t\t\t\t\t\t\t\t\t\t\t",self.getStateValuePrediction(root.right.data)
        print self.getStateValuePrediction(root.left.left.data),"\t\t\t\t\t",self.getStateValuePrediction(root.left.right.data),"\t\t\t\t\t",self.getStateValuePrediction(root.right.left.data),"\t\t\t\t\t",self.getStateValuePrediction(root.right.right.data)

    # select the action with the highest Q value
    def selectAction(self, state, explorationRate):
        rand = random.random()
        if rand < explorationRate :
            action = np.random.randint(0, self.nr_actions)
        else :
            action = self.getMaxIndex(self.getPredictedActionValues(state))
        return action

    def selectActionStepsForward(self, state, depth):
        root = Tree()
        # first layer
        predicted1 = self.getPredictedStates(state)
        leftMax = self.getStateValuePrediction(predicted1[0])
        rightMax = self.getStateValuePrediction(predicted1[1])

        predicted2left = self.getPredictedActionValues(predicted1[0])
        leftMax = max(leftMax, np.max(self.getStateValuePrediction(predicted1[0])))

        predicted2right = self.getPredictedStates(predicted1[1])
        rightMax = max(rightMax, np.max(self.getStateValuePrediction(predicted1[1])))

        if rightMax > leftMax:
            return 1
        else:
            return 0


    def addMemory(self, state, action, reward, newState, isFinal):
        self.memory.addMemory(state, action, reward, newState, isFinal)

    def trainStatePredictionOnLastState(self):
        X_batch = np.empty((0,self.state_size), dtype = np.float64)
        Y_batch = np.empty((0,self.state_size), dtype = np.float64)

        lastMemory = self.memory.getLastMemory()
        isFinal = lastMemory['isFinal']
        state = lastMemory['state']
        action = lastMemory['action']
        reward = lastMemory['reward']
        newState = lastMemory['newState']

        X_batch = np.append(X_batch, [state], axis=0)
        Y_batch = np.append(Y_batch, [newState], axis=0)

        self.predictionModels[action].fit(X_batch, Y_batch, batch_size = len(X_batch), verbose = 0)


    def trainStatePreditions(self, miniBatchSize): 
        X_batches = []
        Y_batches = []
        for t in xrange(self.nr_actions):
            X_batches.append(np.empty((0,self.state_size), dtype = np.float64))
            Y_batches.append(np.empty((0,self.state_size), dtype = np.float64))
        miniBatch = self.memory.getMiniBatch(miniBatchSize)
        for sample in miniBatch:
            isFinal = sample['isFinal']
            state = sample['state']
            action = sample['action']
            reward = sample['reward']
            newState = sample['newState']

            inputValues = state.copy()
            targetValues = newState.copy()

            X_batches[action] = np.append(X_batches[action], np.array([inputValues]), axis=0)
            Y_batches[action] = np.append(Y_batches[action], np.array([targetValues]), axis=0)

        for a in xrange(self.nr_actions):
            if len(X_batches[action]) > 0:
                self.predictionModels[action].fit(X_batches[action].reshape(len(X_batches[action]),4), Y_batches[action], batch_size = len(X_batches[action]), verbose = 0)

    def trainRewardModel(self, miniBatchSize): 
        miniBatch = self.memory.getMiniBatch(miniBatchSize)
        X_batch = np.empty((0,self.state_size), dtype = np.float64)
        Y_batch = np.empty((0,1), dtype = np.float64)
        for sample in miniBatch:
            isFinal = sample['isFinal']
            state = sample['state']
            action = sample['action']
            reward = sample['reward']
            newState = sample['newState']

            inputValues = newState.copy()
            targetValue = [self.getTarget(newState, reward, isFinal)]

            X_batch = np.append(X_batch, np.array([inputValues]), axis=0)
            Y_batch = np.append(Y_batch, [targetValue], axis=0)
        self.rewardModel.fit(X_batch, Y_batch, batch_size = len(miniBatch), verbose = 0)