class Network(object): def __init__(self, parameters, modelName=None): self.parameters = parameters if parameters.SQUARE_ACTIONS: self.actions = createDiscreteActionsSquare( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) else: self.actions = createDiscreteActionsCircle( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) self.num_actions = len(self.actions) self.loadedModelName = None self.gpus = self.parameters.GPUS # Q-learning self.discount = self.parameters.DISCOUNT self.epsilon = self.parameters.EPSILON self.frameSkipRate = self.parameters.FRAME_SKIP_RATE self.gridSquaresPerFov = self.parameters.GRID_SQUARES_PER_FOV # CNN if self.parameters.CNN_REPR: # (KernelSize, stride, filterNum) self.kernel_1 = self.parameters.CNN_L1 self.kernel_2 = self.parameters.CNN_L2 self.kernel_3 = self.parameters.CNN_L3 if self.parameters.CNN_USE_L1: self.stateReprLen = self.parameters.CNN_INPUT_DIM_1 elif self.parameters.CNN_USE_L2: self.stateReprLen = self.parameters.CNN_INPUT_DIM_2 else: self.stateReprLen = self.parameters.CNN_INPUT_DIM_3 else: self.stateReprLen = self.parameters.STATE_REPR_LEN # ANN self.learningRate = self.parameters.ALPHA self.optimizer = self.parameters.OPTIMIZER if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": self.activationFuncHidden = "linear" # keras.layers.ELU(alpha=eluAlpha) else: self.activationFuncHidden = self.parameters.ACTIVATION_FUNC_HIDDEN self.activationFuncLSTM = self.parameters.ACTIVATION_FUNC_LSTM self.activationFuncOutput = self.parameters.ACTIVATION_FUNC_OUTPUT self.layers = parameters.Q_LAYERS if self.parameters.USE_ACTION_AS_INPUT: inputDim = self.stateReprLen + 4 outputDim = 1 else: inputDim = self.stateReprLen outputDim = self.num_actions if self.parameters.EXP_REPLAY_ENABLED: input_shape_lstm = (self.parameters.MEMORY_TRACE_LEN, inputDim) stateful_training = False self.batch_len = self.parameters.MEMORY_BATCH_LEN else: input_shape_lstm = (1, inputDim) stateful_training = True self.batch_len = 1 if self.parameters.INITIALIZER == "glorot_uniform": initializer = keras.initializers.glorot_uniform() elif self.parameters.INITIALIZER == "glorot_normal": initializer = keras.initializers.glorot_normal() else: weight_initializer_range = math.sqrt( 6 / (self.stateReprLen + self.num_actions)) initializer = keras.initializers.RandomUniform( minval=-weight_initializer_range, maxval=weight_initializer_range, seed=None) # CNN if self.parameters.CNN_REPR: if self.parameters.CNN_P_REPR: if self.parameters.CNN_P_INCEPTION: self.input = Input(shape=(self.stateReprLen, self.stateReprLen, 3)) tower_1 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(self.input) tower_1 = Conv2D(self.kernel_2[2], (3, 3), padding='same', activation='relu')(tower_1) tower_2 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(self.input) tower_2 = Conv2D(self.kernel_2[2], (5, 5), padding='same', activation='relu')(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(self.input) tower_3 = Conv2D(self.kernel_2[2], (1, 1), padding='same', activation='relu')(tower_3) self.valueNetwork = keras.layers.concatenate( [tower_1, tower_2, tower_3], axis=3) self.valueNetwork = keras.layers.Flatten()( self.valueNetwork) # DQN approach else: # RGB if self.parameters.CNN_P_RGB: channels = 3 # GrayScale else: channels = 1 if self.parameters.CNN_LAST_GRID: channels = channels * 2 if self.parameters.COORDCONV: channels += 2 self.input = Input(shape=(self.stateReprLen, self.stateReprLen, channels)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_last')(conv) self.valueNetwork = Flatten()(conv) # Not pixel input else: if self.parameters.CNN_TOWER: tower = [] self.input = [] self.towerModel = [] for grid in range(self.parameters.NUM_OF_GRIDS): self.input.append( Input(shape=(1, self.stateReprLen, self.stateReprLen))) if self.parameters.CNN_USE_L1: tower.append( Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')( self.input[grid])) if self.parameters.CNN_USE_L2: if self.parameters.CNN_USE_L1: tower[grid] = Conv2D( self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(tower[grid]) else: tower.append( Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')( self.input[grid])) if self.parameters.CNN_USE_L3: if self.parameters.CNN_USE_L2: tower[grid] = Conv2D( self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(tower[grid]) else: tower.append( Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')( self.input[grid])) tower[grid] = Flatten()(tower[grid]) self.valueNetwork = keras.layers.concatenate( [i for i in tower], axis=1) # Vision grid merging else: self.input = Input(shape=(self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(conv) self.valueNetwork = Flatten()(conv) # Fully connected layers if self.parameters.NEURON_TYPE == "MLP": layerIterable = iter(self.layers) regularizer = keras.regularizers.l2(self.parameters.Q_WEIGHT_DECAY) if self.parameters.DROPOUT: constraint = maxnorm(self.parameters.MAXNORM) else: constraint = None if parameters.CNN_REPR: previousLayer = self.input extraInputSize = self.parameters.EXTRA_INPUT if extraInputSize > 0: extraInput = Input(shape=(extraInputSize, )) self.input = [self.input, extraInput] denseInput = keras.layers.concatenate( [self.valueNetwork, extraInput]) previousLayer = Dense( next(layerIterable), activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer)(denseInput) else: self.input = Input(shape=(inputDim, )) previousLayer = self.input for layer in layerIterable: if layer > 0: if self.parameters.DROPOUT: previousLayer = Dropout( self.parameters.DROPOUT)(previousLayer) previousLayer = Dense( layer, activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": previousLayer = (keras.layers.ELU( alpha=self.parameters.ELU_ALPHA))(previousLayer) if self.parameters.BATCHNORM: previousLayer = BatchNormalization()(previousLayer) if self.parameters.DROPOUT: previousLayer = Dropout(self.parameters.DROPOUT)(previousLayer) output = Dense(outputDim, activation=self.activationFuncOutput, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) elif self.parameters.NEURON_TYPE == "LSTM": # Hidden Layer 1 # TODO: Use CNN with LSTM # if self.parameters.CNN_REPR: # hidden1 = LSTM(self.hiddenLayer1, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len) # else: # hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences = True, # stateful= stateful_training, batch_size=self.batch_len) hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden1) # Hidden 2 if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden2) # Hidden 3 if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(hidden3) # Output layer output = LSTM(outputDim, activation=self.activationFuncOutput, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.valueNetwork.add(output) # Create target network self.targetNetwork = keras.models.clone_model(self.valueNetwork) self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if self.parameters.OPTIMIZER == "Adam": if self.parameters.GRADIENT_CLIP_NORM: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipnorm=self.parameters.GRADIENT_CLIP_NORM, amsgrad=self.parameters.AMSGRAD) elif self.parameters.GRADIENT_CLIP: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipvalue=self.parameters.GRADIENT_CLIP, amsgrad=self.parameters.AMSGRAD) else: optimizer = keras.optimizers.Adam( lr=self.learningRate, amsgrad=self.parameters.AMSGRAD) elif self.parameters.OPTIMIZER == "Nadam": optimizer = keras.optimizers.Nadam(lr=self.learningRate) elif self.parameters.OPTIMIZER == "Adamax": optimizer = keras.optimizers.Adamax(lr=self.learningRate) elif self.parameters.OPTIMIZER == "SGD": if self.parameters.NESTEROV: optimizer = keras.optimizers.SGD( lr=self.learningRate, momentum=self.parameters.NESTEROV, nesterov=True) else: optimizer = keras.optimizers.SGD(lr=self.learningRate) self.optimizer = optimizer self.valueNetwork.compile(loss='mse', optimizer=optimizer) self.targetNetwork.compile(loss='mse', optimizer=optimizer) self.model = self.valueNetwork if self.parameters.NEURON_TYPE == "LSTM": # We predict using only one state input_shape_lstm = (1, self.stateReprLen) self.actionNetwork = Sequential() hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=True, batch_size=1, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden1) if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden2) if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden3) self.actionNetwork.add( LSTM(self.num_actions, activation=self.activationFuncOutput, return_sequences=False, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)) self.actionNetwork.compile(loss='mse', optimizer=optimizer) print(self.valueNetwork.summary()) print("\n") if modelName is not None: self.load(modelName) def reset_general(self, model): session = K.get_session() for layer in model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=session) print('reinitializing layer {}.{}'.format(layer.name, v)) def reset_weights(self): self.reset_general(self.valueNetwork) self.reset_general(self.targetNetwork) def reset_hidden_states(self): self.actionNetwork.reset_states() self.valueNetwork.reset_states() self.targetNetwork.reset_states() def load(self, modelName): path = modelName self.loadedModelName = modelName self.valueNetwork = keras.models.load_model(path + "model.h5") self.targetNetwork = load_model(path + "model.h5") def trainOnBatch(self, inputs, targets, importance_weights): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) else: return self.valueNetwork.train_on_batch( numpy.array([numpy.array([inputs])]), numpy.array([numpy.array([targets])])) else: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) def updateActionNetwork(self): self.actionNetwork.set_weights(self.valueNetwork.get_weights()) def updateTargetNetwork(self): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) def predict(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.valueNetwork.predict(state, batch_size=batch_len) else: return self.valueNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: if self.parameters.CNN_TOWER: stateRepr = numpy.zeros( (len(state), 1, 1, len(state[0]), len(state[0]))) for gridIdx, grid in enumerate(state): stateRepr[gridIdx][0][0] = grid state = list(stateRepr) else: if len(state) == 2: grid = numpy.array([state[0]]) extra = numpy.array([state[1]]) state = [grid, extra] else: state = numpy.array([state]) return self.valueNetwork.predict(state)[0] def predictTargetQValues(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict_target_network( numpy.array([numpy.concatenate((state[0], act))]))[0] for act in self.actions ] else: return self.predict_target_network(state) def predict_target_network(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.targetNetwork.predict(state, batch_size=batch_len) else: return self.targetNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: if self.parameters.CNN_TOWER: stateRepr = numpy.zeros( (len(state), 1, 1, len(state[0]), len(state[0]))) for gridIdx, grid in enumerate(state): stateRepr[gridIdx][0][0] = grid stateRepr = list(stateRepr) return self.targetNetwork.predict(stateRepr)[0] else: if len(state) == 2: grid = numpy.array([state[0]]) extra = numpy.array([state[1]]) state = [grid, extra] else: state = numpy.array([state]) return self.targetNetwork.predict(state)[0] else: return self.targetNetwork.predict(state)[0] def predict_action_network(self, trace): return self.actionNetwork.predict(numpy.array([numpy.array([trace]) ]))[0] def predict_action(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict(numpy.array([numpy.concatenate( (state[0], act))]))[0] for act in self.actions ] else: if self.parameters.NEURON_TYPE == "MLP": return self.predict(state) else: return self.predict_action_network(state) def saveModel(self, path, name=""): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) self.targetNetwork.save(path + name + "model.h5") def setEpsilon(self, val): self.epsilon = val def setFrameSkipRate(self, value): self.frameSkipRate = value def getParameters(self): return self.parameters def getNumOfActions(self): return self.num_actions def getEpsilon(self): return self.epsilon def getDiscount(self): return self.discount def getFrameSkipRate(self): return self.frameSkipRate def getGridSquaresPerFov(self): return self.gridSquaresPerFov def getTargetNetworkMaxSteps(self): return self.targetNetworkMaxSteps def getStateReprLen(self): return self.stateReprLen def getHiddenLayer1(self): return self.hiddenLayer1 def getHiddenLayer2(self): return self.hiddenLayer2 def getHiddenLayer3(self): return self.hiddenLayer3 def getNumActions(self): return self.num_actions def getLearningRate(self): return self.learningRate def getActivationFuncHidden(self): return self.activationFuncHidden def getActivationFuncOutput(self): return self.activationFuncOutput def getOptimizer(self): return self.optimizer def getLoadedModelName(self): return self.loadedModelName def getActions(self): return self.actions def getTargetNetwork(self): return self.targetNetwork def getValueNetwork(self): return self.valueNetwork
class Network(object): def __init__(self, parameters, modelName=None): self.parameters = parameters self.gpus = self.parameters.NUM_GPUS # Q-learning self.discount = self.parameters.DISCOUNT self.epsilon = self.parameters.EPSILON self.frameSkipRate = self.parameters.FRAME_SKIP_RATE if self.parameters.GAME_NAME == "Agar.io": self.gridSquaresPerFov = self.parameters.GRID_SQUARES_PER_FOV # CNN if self.parameters.CNN_REPR: # (KernelSize, stride, filterNum) self.kernel_1 = self.parameters.CNN_L1 self.kernel_2 = self.parameters.CNN_L2 self.kernel_3 = self.parameters.CNN_L3 if self.parameters.CNN_USE_L1: self.stateReprLen = self.parameters.CNN_INPUT_DIM_1 elif self.parameters.CNN_USE_L2: self.stateReprLen = self.parameters.CNN_INPUT_DIM_2 else: self.stateReprLen = self.parameters.CNN_INPUT_DIM_3 else: self.stateReprLen = self.parameters.STATE_REPR_LEN if parameters.SQUARE_ACTIONS: self.actions = createDiscreteActionsSquare( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) else: self.actions = createDiscreteActionsCircle( self.parameters.NUM_ACTIONS, self.parameters.ENABLE_SPLIT, self.parameters.ENABLE_EJECT) self.num_actions = len(self.actions) else: import gym env = gym.make(self.parameters.GAME_NAME) if self.parameters.CNN_REPR: pass else: self.stateReprLen = env.observation_space.shape[0] self.num_actions = env.action_space.n self.actions = list(range(self.num_actions)) # ANN self.learningRate = self.parameters.ALPHA self.optimizer = self.parameters.OPTIMIZER if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": self.activationFuncHidden = "linear" # keras.layers.ELU(alpha=eluAlpha) else: self.activationFuncHidden = self.parameters.ACTIVATION_FUNC_HIDDEN self.activationFuncLSTM = self.parameters.ACTIVATION_FUNC_LSTM self.activationFuncOutput = self.parameters.ACTIVATION_FUNC_OUTPUT self.layers = parameters.Q_LAYERS if self.parameters.USE_ACTION_AS_INPUT: inputDim = self.stateReprLen + 4 outputDim = 1 else: inputDim = self.stateReprLen outputDim = self.num_actions if self.parameters.EXP_REPLAY_ENABLED: input_shape_lstm = (self.parameters.MEMORY_TRACE_LEN, inputDim) stateful_training = False self.batch_len = self.parameters.MEMORY_BATCH_LEN else: input_shape_lstm = (1, inputDim) stateful_training = True self.batch_len = 1 if self.parameters.INITIALIZER == "glorot_uniform": initializer = keras.initializers.glorot_uniform() elif self.parameters.INITIALIZER == "glorot_normal": initializer = keras.initializers.glorot_normal() else: weight_initializer_range = math.sqrt( 6 / (self.stateReprLen + self.num_actions)) initializer = keras.initializers.RandomUniform( minval=-weight_initializer_range, maxval=weight_initializer_range, seed=None) # CNN if self.parameters.CNN_REPR: if self.parameters.CNN_P_REPR: # RGB if self.parameters.CNN_P_RGB: channels = 3 # GrayScale else: channels = 1 if self.parameters.CNN_LAST_GRID: channels = channels * 2 self.input = Input(shape=(self.stateReprLen, self.stateReprLen, channels)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_last')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_last')(conv) self.valueNetwork = Flatten()(conv) # Not pixel input else: # Vision grid merging self.input = Input(shape=(self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen)) conv = self.input if self.parameters.CNN_USE_L1: conv = Conv2D(self.kernel_1[2], kernel_size=(self.kernel_1[0], self.kernel_1[0]), strides=(self.kernel_1[1], self.kernel_1[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L2: conv = Conv2D(self.kernel_2[2], kernel_size=(self.kernel_2[0], self.kernel_2[0]), strides=(self.kernel_2[1], self.kernel_2[1]), activation='relu', data_format='channels_first')(conv) if self.parameters.CNN_USE_L3: conv = Conv2D(self.kernel_3[2], kernel_size=(self.kernel_3[0], self.kernel_3[0]), strides=(self.kernel_3[1], self.kernel_3[1]), activation='relu', data_format='channels_first')(conv) self.valueNetwork = Flatten()(conv) # Fully connected layers if self.parameters.NEURON_TYPE == "MLP": layerIterable = iter(self.layers) regularizer = keras.regularizers.l2(self.parameters.Q_WEIGHT_DECAY) if self.parameters.DROPOUT: constraint = maxnorm(self.parameters.MAXNORM) else: constraint = None if parameters.CNN_REPR: previousLayer = self.valueNetwork else: self.input = Input(shape=(inputDim, )) previousLayer = self.input for layer in layerIterable: if layer > 0: if self.parameters.DROPOUT: previousLayer = Dropout( self.parameters.DROPOUT)(previousLayer) previousLayer = Dense( layer, activation=self.activationFuncHidden, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) if self.parameters.ACTIVATION_FUNC_HIDDEN == "elu": previousLayer = (keras.layers.ELU( alpha=self.parameters.ELU_ALPHA))(previousLayer) if self.parameters.BATCHNORM: previousLayer = BatchNormalization()(previousLayer) if self.parameters.DROPOUT: previousLayer = Dropout(self.parameters.DROPOUT)(previousLayer) output = Dense(outputDim, activation=self.activationFuncOutput, bias_initializer=initializer, kernel_initializer=initializer, kernel_regularizer=regularizer, kernel_constraint=constraint)(previousLayer) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) elif self.parameters.NEURON_TYPE == "LSTM": # Hidden Layer 1 # TODO: Use CNN with LSTM # if self.parameters.CNN_REPR: # hidden1 = LSTM(self.hiddenLayer1, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len) # else: # hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences = True, # stateful= stateful_training, batch_size=self.batch_len) hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)(self.valueNetwork) # Hidden 2 if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)( self.valueNetwork) # Hidden 3 if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)( self.valueNetwork) # Output layer output = LSTM(outputDim, activation=self.activationFuncOutput, return_sequences=True, stateful=stateful_training, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)(self.valueNetwork) self.valueNetwork = keras.models.Model(inputs=self.input, outputs=output) # Create target network self.valueNetwork._make_predict_function() self.targetNetwork = keras.models.clone_model(self.valueNetwork) self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if self.parameters.OPTIMIZER == "Adam": if self.parameters.GRADIENT_CLIP_NORM: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipnorm=self.parameters.GRADIENT_CLIP_NORM, amsgrad=self.parameters.AMSGRAD) elif self.parameters.GRADIENT_CLIP: optimizer = keras.optimizers.Adam( lr=self.learningRate, clipvalue=self.parameters.GRADIENT_CLIP, amsgrad=self.parameters.AMSGRAD) else: optimizer = keras.optimizers.Adam( lr=self.learningRate, amsgrad=self.parameters.AMSGRAD) elif self.parameters.OPTIMIZER == "Nadam": optimizer = keras.optimizers.Nadam(lr=self.learningRate) elif self.parameters.OPTIMIZER == "Adamax": optimizer = keras.optimizers.Adamax(lr=self.learningRate) elif self.parameters.OPTIMIZER == "SGD": if self.parameters.NESTEROV: optimizer = keras.optimizers.SGD( lr=self.learningRate, momentum=self.parameters.NESTEROV, nesterov=True) else: optimizer = keras.optimizers.SGD(lr=self.learningRate) self.optimizer = optimizer self.valueNetwork.compile(loss='mse', optimizer=optimizer) self.targetNetwork.compile(loss='mse', optimizer=optimizer) self.model = self.valueNetwork if self.parameters.NEURON_TYPE == "LSTM": # We predict using only one state input_shape_lstm = (1, self.stateReprLen) self.actionNetwork = Sequential() hidden1 = LSTM(self.hiddenLayer1, input_shape=input_shape_lstm, return_sequences=True, stateful=True, batch_size=1, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden1) if self.hiddenLayer2 > 0: hidden2 = LSTM(self.hiddenLayer2, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden2) if self.hiddenLayer3 > 0: hidden3 = LSTM(self.hiddenLayer3, return_sequences=True, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer) self.actionNetwork.add(hidden3) self.actionNetwork.add( LSTM(self.num_actions, activation=self.activationFuncOutput, return_sequences=False, stateful=True, batch_size=self.batch_len, bias_initializer=initializer, kernel_initializer=initializer)) self.actionNetwork.compile(loss='mse', optimizer=optimizer) # if __debug__: print(self.valueNetwork.summary()) # print("\n") if modelName is not None: self.load(modelName) self.targetNetwork._make_predict_function() sess = tf.Session() sess.run(tf.global_variables_initializer()) self.graph = tf.get_default_graph() # Necessary for multiprocessing to warm up the network def dummy_prediction(self): if self.parameters.CNN_REPR: input_shape = ([ self.parameters.NUM_OF_GRIDS, self.stateReprLen, self.stateReprLen ]) else: input_shape = (self.stateReprLen, ) dummy_input = numpy.zeros(input_shape) dummy_input = numpy.array([dummy_input]) self.predict(dummy_input) def reset_general(self, model): session = K.get_session() for layer in model.layers: for v in layer.__dict__: v_arg = getattr(layer, v) if hasattr(v_arg, 'initializer'): initializer_method = getattr(v_arg, 'initializer') initializer_method.run(session=session) print('reinitializing layer {}.{}'.format(layer.name, v)) def reset_weights(self): self.reset_general(self.valueNetwork) self.reset_general(self.targetNetwork) def reset_hidden_states(self): self.actionNetwork.reset_states() self.valueNetwork.reset_states() self.targetNetwork.reset_states() def load(self, modelName): path = modelName + "model.h5" self.valueNetwork = keras.models.load_model(path) self.targetNetwork = load_model(path) def setWeights(self, weights): self.valueNetwork.set_weights(weights) def trainOnBatch(self, inputs, targets, importance_weights): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) else: return self.valueNetwork.train_on_batch( numpy.array([numpy.array([inputs])]), numpy.array([numpy.array([targets])])) else: if self.parameters.PRIORITIZED_EXP_REPLAY_ENABLED: return self.valueNetwork.train_on_batch( inputs, targets, sample_weight=importance_weights) else: return self.valueNetwork.train_on_batch(inputs, targets) def updateActionNetwork(self): self.actionNetwork.set_weights(self.valueNetwork.get_weights()) def updateTargetNetwork(self): self.targetNetwork.set_weights(self.valueNetwork.get_weights()) if __debug__: print("Target Network updated.") def predict(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.valueNetwork.predict(state, batch_size=batch_len) else: return self.valueNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: state = numpy.array([state]) with self.graph.as_default(): prediction = self.valueNetwork.predict(state)[0] return prediction def predictTargetQValues(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict_target_network( numpy.array([numpy.concatenate((state[0], act))]))[0] for act in self.actions ] else: return self.predict_target_network(state) def predict_target_network(self, state, batch_len=1): if self.parameters.NEURON_TYPE == "LSTM": if self.parameters.EXP_REPLAY_ENABLED: return self.targetNetwork.predict(state, batch_size=batch_len) else: return self.targetNetwork.predict( numpy.array([numpy.array([state])]))[0][0] if self.parameters.CNN_REPR: state = numpy.array([state]) return self.targetNetwork.predict(state)[0] else: return self.targetNetwork.predict(state)[0] def predict_action_network(self, trace): return self.actionNetwork.predict(numpy.array([numpy.array([trace]) ]))[0] def predict_action(self, state): if self.parameters.USE_ACTION_AS_INPUT: return [ self.predict(numpy.array([numpy.concatenate( (state[0], act))]))[0] for act in self.actions ] else: if self.parameters.NEURON_TYPE == "MLP": return self.predict(state) else: return self.predict_action_network(state) def saveModel(self, path, name=""): if not os.path.exists(path + "models/"): os.mkdir(path + "models/") self.targetNetwork.set_weights(self.valueNetwork.get_weights()) complete = False while not complete: try: self.targetNetwork.save(path + "models/" + name + "model.h5") complete = True except Exception: print("Error saving network. ########################") complete = False print("Trying to save again...") def setEpsilon(self, val): self.epsilon = val def setFrameSkipRate(self, value): self.frameSkipRate = value def getParameters(self): return self.parameters def getNumOfActions(self): return self.num_actions def getEpsilon(self): return self.epsilon def getDiscount(self): return self.discount def getFrameSkipRate(self): return self.frameSkipRate def getGridSquaresPerFov(self): return self.gridSquaresPerFov def getStateReprLen(self): return self.stateReprLen def getNumActions(self): return self.num_actions def getLearningRate(self): return self.learningRate def getActivationFuncHidden(self): return self.activationFuncHidden def getActivationFuncOutput(self): return self.activationFuncOutput def getOptimizer(self): return self.optimizer def getActions(self): return self.actions def getTargetNetwork(self): return self.targetNetwork def getValueNetwork(self): return self.valueNetwork