def base_lstm_subword(vocabulary_size: int, embedding_size: int, max_char_length: int, max_seq_length: int, embedding_matrix: np.array, y_dictionary: dict) -> Model: input = Input(shape=(max_char_length, ), name='main_input') mask = Input(shape=( max_seq_length, max_char_length, ), name='mask_input') embedding = Embedding( input_dim=vocabulary_size, output_dim=embedding_size, weights=[embedding_matrix], input_length=max_char_length, trainable=True, embeddings_regularizer=regularizers.l2(0.000001))(input) embedding = Dropout(0.4)(embedding) model = Lambda(lambda values: K.batch_dot(values[0], values[1]))( [mask, embedding]) #model = Dropout(0.4)(model) model = Bidirectional(LSTM(256))(model) model = Dense(len(y_dictionary), activation='softmax')(model) model = Model([input, mask], model) optimizer = RMSprop(lr=0.001, decay=0.00005) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) return model
def __build_model__(self): # The input of the NN will be the stacked frames dimensions # That is: # - Height: Image/State's height # - Width: Image/State's width # - Depth: Number of stacked frames (3 by default) inputs = Input(shape=(self.state_size_h, self.state_size_w, self.stack_size), name="main_input") # There will be four layers of convolutions performed on the stack of images input model = Conv2D(filters=32, kernel_size=(8,8), strides=(4,4), activation="relu", padding="valid", kernel_initializer=self.kernel_initializer, name="conv1")(inputs) model = Conv2D(filters=64, kernel_size=(4,4), strides=(2,2), activation="relu", padding="valid", kernel_initializer=self.kernel_initializer, name="conv2")(model) model = Conv2D(filters=64, kernel_size=(3,3), strides=(1,1), activation="relu", padding="valid", kernel_initializer=self.kernel_initializer, name="conv3")(model) model = Conv2D(filters=self.final_conv_layer_size, kernel_size=(7,7), strides=(1,1), activation="relu", padding="valid", kernel_initializer=self.kernel_initializer, name="conv4")(model) # Dueling DQN # We then separate the final convolution layer into an advantage and value # stream. The value function is how well off you are in a given state. Every # state has its associated value (i.e. being off the road). From that # point, the advantage represents how much better off you end up after performing # one action in that state. Q is the value function of a state after a given action. # Advantage(state, action) = Q(state, action) - Value(state) # Q values is now easier to compute # We give each stream half of the final Conv2D output # Advantage Stream Compute (AC): 0->(final_conv_layer_size // 2) # Value Stream Compute (VC): (final_conv_layer_size // 2) -> final_conv_layer_size stream_AC = Lambda(lambda layer: layer[:,:,:,:self.final_conv_layer_size // 2], name="advantage")(model) stream_VC = Lambda(lambda layer: layer[:,:,:,self.final_conv_layer_size // 2:], name="value")(model) # We then flatten the advantage and value functions: We transform # the depth to be just 1 stream_AC = Flatten(name="advantage_flatten")(stream_AC) stream_VC = Flatten(name="value_flatten")(stream_VC) # We define weights for our advantage and value layers. We will train these # layers so the matmul will match the expected value and advantage from play # Remember that the advantage references each action # The value is just how well we are in a single state advantage_layer = Dense(len(self.actions),name="advantage_final")(stream_AC) value_layer = Dense(1, name="value_final")(stream_VC) # To get the Q output, we need to add the value to the advantage. # But adding them directly is not correct! (Given Q we're unable to # find A(s,a) and V(s)) # Q(s,a) != V(s) + A(s, a) # We can solve this by substracting the mean of the Advante to A(s,a) model = Lambda(lambda val_adv: val_adv[0] + (val_adv[1] - K.mean(val_adv[1], axis=1, keepdims=True)), name="final_out")([value_layer, advantage_layer]) model = Model(inputs, model) model.compile(self.optimizer, self.loss_function) model.optimizer.lr = self.learning_rate return model