def __init__(self, h=8, d_k=64, d_v=64, d_model=512, d_inner_hid=2048): super(EncoderLayer, self).__init__() self._mha = MultiHeadAttention(h=h, d_k=d_k, d_v=d_v, d_model=d_model) self._ln_a = LayerNormalization() self._psfw = PositionWiseFeedForward(d_model=d_model, d_ff=d_inner_hid) self._ln_b = LayerNormalization() self._add_a = Add() self._add_b = Add()
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): super(TransformerBlock, self).__init__() self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim) self.ffn = Sequential( [Dense(ff_dim, activation="relu"), Dense(embed_dim)]) self.layernorm1 = LayerNormalization(epsilon=1e-6) self.layernorm2 = LayerNormalization(epsilon=1e-6) self.dropout1 = Dropout(rate) self.dropout2 = Dropout(rate)
def __init__(self, ff_units, embedding_dim, num_words, n_heads, dropout_rate=0): super(EncoderBlock, self).__init__() self.ff = FeedForward(units=ff_units, input_dim=embedding_dim, dropout_rate=dropout_rate) #Multi Head Attention in transformers gives the transformer an understanding of how each word is dependent on the rest of the words in the sentence #However, attention layers for each word often output their respective word as unreasonably important, not giving much information on the other words #So, the outputs of the attention layer are repeated n_heads amount of times, and averaged out self.attention = MultiHeadAttention(num_heads=n_heads, key_dim=num_words) self.norm1, self.norm2 = LayerNormalization(), LayerNormalization() self.dropout1, self.dropout2 = Dropout(dropout_rate), Dropout(dropout_rate)
def build_discriminator(n_var): model = tf.keras.Sequential() model.add(Dense(n_var*5, use_bias=True)) model.add(LayerNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.2)) model.add(Dense(n_var*15)) model.add(LayerNormalization()) model.add(LeakyReLU()) model.add(Flatten()) model.add(Dense(1)) return model
def __init__(self, d_model, num_heads, dff, rate=0.1): super(DecoderLayer, self).__init__() self.mmha1 = MultiHeadAttentionLayer(d_model, num_heads) self.mha2 = MultiHeadAttentionLayer(d_model, num_heads) self.ffn = self.point_wise_feed_forward_network(d_model, dff) self.norm_layer1 = LayerNormalization(epsilon=1e-6) self.norm_layer2 = LayerNormalization(epsilon=1e-6) self.norm_layer3 = LayerNormalization(epsilon=1e-6) self.drop_layer1 = Dropout(rate) self.drop_layer2 = Dropout(rate) self.drop_layer3 = Dropout(rate)
def LSTM_Multi(n_timesteps, rate, time=None, dropout=None, cater=None): model = Sequential() opt = Adagrad(learning_rate=rate) #how we change the weights #opt = tf.keras.optimizers.Adam(learning_rate=rate) loss = 'sparse_categorical_crossentropy' if cater is True: loss = 'categorical_crossentropy' #how wrong we are if time is False: #model.add(LSTM(64,input_shape = (n_timesteps,1),return_sequences= True)) model.add(LSTM(64, input_shape=(n_timesteps, 1))) if time is True: model.add(LSTM(64, input_shape=(n_timesteps, 2))) #model.add(Dense(100,activation='relu')) #model.add(BatchNormalization()) if dropout is True: model.add(layers.Dropout(0.5)) model.add(LayerNormalization()) # model.add(LSTM(64,return_sequences=True)) # model.add(LSTM(32,return_sequences=True)) #model.add(LSTM(32)) model.add(Dense(5, activation='softmax')) #final layer with 5 output classes model.compile(loss=loss, optimizer=opt, metrics=['accuracy']) model.summary() return model
def create_model(self): Z = Input(shape=(self.samples, self.nmic), dtype=tf.float32) # shape = (nbatch, nsamples, nmic) R = Input(shape=(self.samples, ), dtype=tf.float32) # shape = (nbatch, nsamples) pid = Input(shape=(1, ), dtype=tf.int32) # shape = (nbatch,) # statistic adaption X = Adaption(kernel_size=self.wlen, ndoa=self.ndoa)([Z, pid[:, 0] ]) # shape = (nbatch, nsamples, nmic) # fast convolution X = Lambda(self.forward)(X) # shape = (nbatch, nfram, wlen*nmic) H = Dense(units=self.nbin, activation='linear')(X) # shape = (nbatch, nfram, nbin) # beamforming X = LayerNormalization()(H) X = Bidirectional( LSTM(units=self.nbin, activation='tanh', return_sequences=True))(X) X = Dense(units=self.nbin, activation='tanh')(X) # shape = (nbatch, nfram, nbin) G = Dense(units=self.nbin, activation='linear')(X) # shape = (nbatch, nfram, nbin) Y, Py = Lambda(self.bf_filter)([H, G]) # fast deconvolution Y = Dense(units=self.wlen, activation='linear')(Y) # shape = (nbatch, nfram, wlen) Y = Lambda(self.inverse)(Y) # shape = (nbatch, samples) cost = Lambda(self.cost)([R, Y]) self.model = Model(inputs=[Z, R, pid], outputs=[Py, Y, cost])
def build_discriminator(n_var): model = tf.keras.Sequential() model.add(Dense(5, input_shape=(20, n_var, 1))) model.add(Flatten()) model.add(LayerNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.2)) model.add(Dense(5 * 2 * 16)) model.add(LayerNormalization()) model.add(LeakyReLU()) model.add(Dropout(0.2)) model.add(Dense(5 * 2 * 16)) model.add(Dense(1)) return model
def build_Embedded(self, action_space=6, dueling=True): self.network_size = 256 X_input = Input(shape=(self.REM_STEP*99) ) # X_input = Input(shape=(self.REM_STEP*7,)) input_reshape=(self.REM_STEP,99) X = X_input truncatedn_init = initializers.TruncatedNormal(0, 1e-2) x_init = "he_uniform" y_init = initializers.glorot_uniform() const_init = initializers.constant(1e-2) X_reshaped = Reshape(input_reshape)(X_input) X = LayerNormalization(axis=2)(X_reshaped) X = LayerNormalization(axis=1)(X) X = Reshape((2,-1))(X) X = Dense(self.network_size*2, kernel_initializer=y_init, activation ="relu")(X) X = Dense(256, kernel_initializer=y_init, activation ="relu")(X) X = TimeDistributed(Dense(self.network_size/4, kernel_initializer=y_init,activation ="relu"))(X) X = Flatten()(X) if dueling: state_value = Dense( 1, activation ="softmax")(X) state_value = Lambda(lambda s: K.expand_dims( s[:, 0], -1), output_shape=(action_space,))(state_value) action_advantage = Dense( action_space, activation ="linear") (X) action_advantage = Lambda(lambda a: a[:, :] - K.mean( a[:, :], keepdims=True), output_shape=(action_space,))(action_advantage) X = Add()([state_value, action_advantage]) else: # Output Layer with # of actions: 2 nodes (left, right) X = Dense(action_space, activation="relu", kernel_initializer='he_uniform')(X) model = Model(inputs=X_input, outputs=X, name='build_Embedded') model.compile(loss=huber_loss, optimizer=Adam( lr=self.learning_rate), metrics=["accuracy"]) # model.compile(loss="mean_squared_error", optimizer=Adam(lr=0.00025,epsilon=0.01), metrics=["accuracy"]) model.summary() return model
def __init__(self, ff_units, input_length, embedding_dim, num_chars, n_heads, dropout_rate): super(DecoderBlock, self).__init__() #The main focus on the __init__ function is to prepare all of the layers to be called on the input self.ff = FeedForward(units=ff_units, input_dim=embedding_dim, dropout_rate=dropout_rate) #Using the mask_attention from before, we will create a self.mask variable to generate an input_length*input_length array to be used by the masked attention layer self.mask = mask_attention(input_length) self.masked_attention = MultiHeadAttention(num_heads=n_heads, key_dim=num_chars) self.attention = MultiHeadAttention(num_heads=n_heads, key_dim=num_chars) self.norm1, self.norm2, self.norm3 = LayerNormalization( ), LayerNormalization(), LayerNormalization() #The constant dropouts used here will help with reducing overfitting, which can happen easily in decoders self.dropout1, self.dropout2, self.dropout3 = Dropout( dropout_rate), Dropout(dropout_rate), Dropout(dropout_rate)
def get_model(train=True): """ Parameters ---------- reload_model : bool Load saved model or retrain it """ if not train: return load_model( MODEL_PATH, custom_objects={'LayerNormalization': LayerNormalization}) training_generator = DataGenerator(DATASET_PATH, CLIP_LEN, STRIDE, DIM, BATCH_SIZE, N_CHANNELS, SHUFFLE) seq = Sequential() seq.add( TimeDistributed(Conv2D(16, (11, 11), strides=4, padding="same"), batch_input_shape=(None, *DIM, N_CHANNELS))) seq.add(LayerNormalization()) seq.add(TimeDistributed(Conv2D(8, (8, 8), strides=2, padding="same"))) seq.add(LayerNormalization()) ###### seq.add(ConvLSTM2D(8, (3, 3), padding="same", return_sequences=True)) seq.add(LayerNormalization()) seq.add(ConvLSTM2D(4, (3, 3), padding="same", return_sequences=True)) seq.add(LayerNormalization()) seq.add(ConvLSTM2D(8, (3, 3), padding="same", return_sequences=True)) seq.add(LayerNormalization()) ###### seq.add( TimeDistributed(Conv2DTranspose(8, (8, 8), strides=2, padding="same"))) seq.add(LayerNormalization()) seq.add( TimeDistributed( Conv2DTranspose(16, (11, 11), strides=4, padding="same"))) seq.add(LayerNormalization()) seq.add( TimeDistributed( Conv2D(1, (11, 11), activation="sigmoid", padding="same"))) print(seq.summary()) seq.compile(loss='mse', optimizer=keras.optimizers.Adam(lr=1e-4, decay=1e-5, epsilon=1e-6)) seq.fit(x=training_generator, epochs=EPOCHS, verbose=True, workers=0, use_multiprocessing=False) seq.save(MODEL_PATH) return seq
def residualConv(self, X, kernel_size, dilation_rate=1, activation='softplus'): Z = Conv1D(filters=X.shape[-1], kernel_size=kernel_size, activation=activation, padding='same', dilation_rate=dilation_rate)(X) Z = LayerNormalization()(Z) + X return Z
def create_model(self): Py = Input(shape=(None, self.nbin), dtype=tf.float32) # shape = (nbatch, nfram, nbin) sid = Input(shape=(1,), dtype=tf.int32) # shape = (nbatch, 1) # identification X = Dense(units=self.ndim*2, activation='softplus')(Py) X = LayerNormalization(axis=1, center=False, scale=False)(X) X = self.residualConv(X, kernel_size=10, dilation_rate=1) X = self.residualConv(X, kernel_size=10, dilation_rate=2) X = self.residualConv(X, kernel_size=10, dilation_rate=4) X = self.residualConv(X, kernel_size=10, dilation_rate=8) X = self.residualConv(X, kernel_size=10, dilation_rate=16) X = self.residualConv(X, kernel_size=10, dilation_rate=32) X = Lambda(self.average_pool)(X) E = Dense(units=self.ndim, activation='linear')(X) # shape = (nbatch, ndim) cost = Lambda(self.cost)([E, sid]) self.model = Model(inputs=[Py, sid], outputs=[E, cost])
def ANN(self): #self.dataset = pd.read_csv(self.fileName, sep=";", decimal=",") x = self.dataset.iloc[:, :7].values y = self.dataset.iloc[:, 7].values self.X = x # Normalization sc = MinMaxScaler() x = sc.fit_transform(x) y = y.reshape(-1, 1) y = sc.fit_transform(y) # Splitting the dataset into the Training set and Test set x_train, x_test, y_train, self.y_test = train_test_split(x, y, test_size=0.10, random_state=4) x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.05, random_state=4) # built Keras sequential model model = Sequential() # add batch normalization model.add(LayerNormalization()) # Adding the input layer: model.add(Dense(7, input_dim=x_train.shape[1], activation='relu')) # the first hidden layer: model.add(Dense(12, activation='relu')) # Adding the output layer model.add(Dense(1, activation='sigmoid')) # Compiling the ANN model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse', 'mae', rmse, r_square]) # enable early stopping based on mean_squared_error earlystopping = EarlyStopping(monitor="mse", patience=40, verbose=1, mode='auto') # Fit model self.result = model.fit(x_train, y_train, batch_size=100, epochs=1000, validation_data=(x_test, self.y_test), callbacks=[earlystopping]) # result = model.fit(x_train, y_train, batch_size=100, epochs=1000, validation_data=(x_test, y_test), # callbacks=[earlystopping]) # get predictions self.y_pred = model.predict(x_test) self.y_x_pred = model.predict(x) self.Y = sc.inverse_transform(self.y_x_pred) # ----------------------------------------------------------------------------- # print statistical figures of merit # ----------------------------------------------------------------------------- import sklearn.metrics, math self.textBrowser_2.setText( f"Mean absolute error (MAE): {sklearn.metrics.mean_absolute_error(self.y_test, self.y_pred)}" ) self.textBrowser_2.append( f"Mean squared error (MSE): {sklearn.metrics.mean_squared_error(self.y_test, self.y_pred)}") self.textBrowser_2.append(f"Root mean squared error (RMSE): {math.sqrt( sklearn.metrics.mean_squared_error(self.y_test, self.y_pred))}") self.textBrowser_2.append( f"R square (R^2): {sklearn.metrics.r2_score(self.y_test, self.y_pred)}") self.dataset['Thwo_Predict'] = self.Y self.newFile = "newCsvFile.xlsx" self.dataset.to_excel(self.newFile, index=False)
def define_generator(n_blocks): model_list = list() # input ly0 = Input(shape=(1, 100, 2)) featured = Conv2D(128, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(ly0) # bloque 1 deconvolusion g_1 = Conv2DTranspose(4, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_1 = Dense(8)(g_1) g_1 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(g_1) g_1 = Dense(32)(g_1) g_1 = Conv2DTranspose(64, (1, 15), strides=(1, 15), padding='valid', kernel_initializer='he_normal')(g_1) op_1 = Dense(128)(g_1) # bloque 2 deconvolusion g_2 = Conv2DTranspose(4, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_2 = Dense(8)(g_2) g_2 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(g_2) g_2 = Dense(32)(g_2) g_2 = Conv2DTranspose(64, (1, 15), strides=(1, 15), padding='valid', kernel_initializer='he_normal')(g_2) op_2 = Dense(128)(g_2) # bloque 3 deconvolusion g_3 = Conv2DTranspose(4, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_3 = Dense(8)(g_3) g_3 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(g_3) g_3 = Dense(32)(g_3) g_3 = Conv2DTranspose(64, (1, 15), strides=(1, 15), padding='valid', kernel_initializer='he_normal')(g_3) op_3 = Dense(128)(g_3) # bloque 4 deconvolusion g_4 = Conv2DTranspose(4, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_4 = Dense(8)(g_4) g_4 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(g_4) g_4 = Dense(32)(g_4) g_4 = Conv2DTranspose(64, (1, 15), strides=(1, 15), padding='valid', kernel_initializer='he_normal')(g_4) op_4 = Dense(128)(g_4) #combinar canales sumarized_blocks = Add()([op_1, op_2, op_3, op_4]) sumarized_blocks = Dense(256)(sumarized_blocks) sumarized_blocks = Dense(64)(sumarized_blocks) sumarized_blocks = Dense(8)(sumarized_blocks) sumarized_blocks = Dense(2)(sumarized_blocks) wls = LayerNormalization(axis=[1, 2, 3])(sumarized_blocks) model = Model(ly0, wls) # store model model_list.append([model, model]) # create submodels for i in range(1, n_blocks): # get prior model without the fade-on old_model = model_list[i - 1][0] # create new model for next resolution models = add_generator_block(old_model) # store model model_list.append(models) return model_list
def build_1CNNBase(self, action_space=6, dueling=True): self.network_size = 256 X_input = Input(shape=(self.REM_STEP*90) ) # X_input = Input(shape=(self.REM_STEP*7,)) input_reshape=(self.REM_STEP,90) X = X_input truncatedn_init = initializers.TruncatedNormal(0, 1e-2) x_init = initializers.GlorotNormal() y_init = initializers.glorot_uniform() const_init = initializers.constant(1e-2) X_reshaped = Reshape(input_reshape)(X_input) # slice2 = Permute((2,1))(slice2) #normailsation for each layer normlayer_0 = LayerNormalization( axis=-1,trainable=True,epsilon =0.0001,center=False, scale=True, beta_initializer="zeros", gamma_initializer="ones" ) normlayer_1 = LayerNormalization( axis=-1,trainable=True,epsilon =0.001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_2 = LayerNormalization( axis=-1,trainable=True,epsilon =0.001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_3 = LayerNormalization( axis=-1,trainable=True,epsilon =0.001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") X = normlayer_0(X_reshaped) # X = TimeDistributed(Dense(128, activation ="relu", kernel_initializer='he_uniform',))(X) # # cnn2 = TimeDistributed(Dense(64, kernel_initializer='he_uniform',))(X) # cnn2 = LeakyReLU(.4)(cnn2) # cnn2 = MaxPooling1D(2)(cnn2) # cnn2 = Flatten()(cnn2) # cnn2 = LocallyConnected1D(filters=64, kernel_initializer='he_uniform', kernel_size=2)(X) # cnn2 = LeakyReLU(0.3)(cnn2) # cnn2 = LocallyConnected1D(filters=64, kernel_initializer='he_uniform', kernel_size=2)(X) # cnn2 = Dense(128,activation="relu", kernel_initializer='he_uniform', )(cnn2) # cnn2 = Flatten()(cnn2) # X = Dense(32, # kernel_initializer='he_uniform',activation ="relu")(X) # X = normlayer_3(X) # X = Dense(64, # kernel_initializer='he_uniform',activation =LeakyReLU(.3))(X) X = Flatten()(X) X = Dense(512, activation =LeakyReLU(.1), kernel_initializer=x_init)(X) X = Dense(256, activation =LeakyReLU(.1), kernel_initializer=x_init)(X) if dueling: state_value = Dense( 1, kernel_initializer=x_init , activation ="softmax")(X) state_value = Lambda(lambda s: K.expand_dims( s[:, 0], -1), output_shape=(action_space,))(state_value) action_advantage = Dense( action_space, kernel_initializer=x_init, activation = "linear") (X) action_advantage = Lambda(lambda a: a[:, :] - K.mean( a[:, :], keepdims=True), output_shape=(action_space,))(action_advantage) X = Add()([state_value, action_advantage]) else: # Output Layer with # of actions: 2 nodes (left, right) X = Dense(action_space, activation="relu", kernel_initializer='he_uniform')(X) model = Model(inputs=X_input, outputs=X, name='build_TMaxpoolin_3') model.compile(loss=tf.keras.losses.Huber(delta=2), metrics=['mean_absolute_error','accuracy'] ,optimizer=Adam( lr=self.learning_rate)) # model.compile(loss="mean_squared_error", optimizer=Adam(lr=0.00025,epsilon=0.01), metrics=["accuracy"]) model.summary() return model
def add_generator_block(old_model): # get the end of the last block block_end = old_model.layers[-2].output # upsample, and define new block upsampling = UpSampling2D()(block_end) featured = Conv2D(128, (2, 2), strides=(2, 2), padding='valid', kernel_initializer='he_normal')(upsampling) # bloque 1 deconvolusion g_1 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_1 = Dense(32)(g_1) g_1 = Conv2DTranspose(64, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(g_1) op_1 = Dense(128)(g_1) # bloque 2 deconvolusion g_2 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_2 = Dense(32)(g_2) g_2 = Conv2DTranspose(64, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(g_2) op_2 = Dense(128)(g_2) # bloque 3 deconvolusion g_3 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_3 = Dense(32)(g_3) g_3 = Conv2DTranspose(64, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(g_3) op_3 = Dense(128)(g_3) # bloque 4 deconvolusion g_4 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_4 = Dense(32)(g_4) g_4 = Conv2DTranspose(64, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(g_4) op_4 = Dense(128)(g_4) # bloque 5 deconvolusion g_5 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_5 = Dense(32)(g_5) g_5 = Conv2DTranspose(64, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(g_5) op_5 = Dense(128)(g_5) # bloque 3 deconvolusion g_6 = Conv2DTranspose(16, (2, 1), strides=(2, 1), padding='valid', kernel_initializer='he_normal')(featured) g_6 = Dense(32)(g_6) g_6 = Conv2DTranspose(64, (1, 2), strides=(1, 2), padding='valid', kernel_initializer='he_normal')(g_6) op_6 = Dense(128)(g_6) #sumarize sumarized_blocks = Add()([op_1, op_2, op_3, op_4, op_5, op_6]) sumarized_blocks = Dense(256)(sumarized_blocks) sumarized_blocks = Dense(64)(sumarized_blocks) sumarized_blocks = Dense(8)(sumarized_blocks) for_sum_layer = Dense(2)(sumarized_blocks) out_image = LayerNormalization(axis=[1, 2, 3])(for_sum_layer) # define model model1 = Model(old_model.input, out_image) # define new output image as the weighted sum of the old and new models merged = WeightedSum()([upsampling, for_sum_layer]) output_2 = LayerNormalization(axis=[1, 2, 3])(merged) # define model model2 = Model(old_model.input, output_2) return [model1, model2]
def FCTime_distributed_model(self, action_space=6, dueling=True): self.network_size = 256 X_input = Input(shape=(self.REM_STEP*62) ) # X_input = Input(shape=(self.REM_STEP*7,)) input_reshape=(self.REM_STEP,62) X = X_input truncatedn_init = initializers.TruncatedNormal(0, 1e-2) x_init = "he_uniform" y_init = initializers.glorot_uniform() const_init = initializers.constant(1e-2) X_reshaped = Reshape(input_reshape)(X_input) slice2 = tf.slice(X_reshaped,[0,0,0],[-1,2,8]) slice2 = Reshape((2,-1,1))(slice2) # slice2 = Permute((2,1))(slice2) #normailsation fort each dimension normlayer_1 = LayerNormalization( axis=2,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_2 = LayerNormalization( axis=1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") TD1 = (normlayer_1)(slice2) # TD1 = (normlayer_2)(TD1) # TD1 = TimeDistributed(Dense(32, activation = "tanh",bias_initializer=const_init,kernel_initializer=y_init, use_bias=True))(TD1) # TD1 = Reshape((2,-1))(TD1) TD1 = Dense(4, activation = "relu", kernel_initializer=y_init, bias_initializer=const_init, use_bias=True)(TD1) TD1 = Dense(4, activation = "relu", kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD1) TD1 = Flatten()(TD1) slice3 = tf.slice(X_reshaped,[0,0,8],[-1,2,4]) slice3 = Reshape((2,-1,4))(slice3) # slice3 = Permute((2,1))(slice3) normlayer_1 = LayerNormalization( axis=2,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_2 = LayerNormalization( axis=1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") TD2 = (normlayer_1)(slice3) # TD2 = (normlayer_2)(TD2) # TD2 = TimeDistributed(Dense(32, activation =PReLU(), bias_initializer=const_init,kernel_initializer=y_init,use_bias=True))(TD2) # TD2 = Reshape((2,-1))(TD2) TD2 = Dense(4, activation = "relu", kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD2) TD2 = Dense(2, activation ="relu", kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD2) TD2 = Dense(1, activation = "relu" ,kernel_initializer=y_init ,bias_initializer=const_init,use_bias=True)(TD2) TD2 = Flatten()(TD2) # slice4 = tf.slice(X_reshaped,[0,0,12],[-1,2,20]) slice4 = Reshape((2,-1,4))(slice4) # slice4 = Permute((2,1))(slice4) normlayer_1 = LayerNormalization( axis=2,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_2 = LayerNormalization( axis=1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") TD3 = (normlayer_1)(slice4) # TD3 = (normlayer_2)(TD3) # TD3 = TimeDistributed(Dense(32, activation =PReLU(), kernel_initializer=y_init,bias_initializer=const_init,use_bias=True))(TD3) # TD3 = Reshape((2,-1))(TD3) TD3 = Dense(4, activation = "relu", kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD3) TD3 = Dense(2, activation = "relu",kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD3) TD3 = Dense(1, activation ="relu",kernel_initializer=y_init,bias_initializer=const_init, use_bias=True)(TD3) TD3 = Flatten()(TD3) # slice5 = tf.slice(X_reshaped,[0,0,32],[-1,2,-1]) slice5 = Reshape((2,-1,2))(slice5) # slice5 = Permute((2,1))(slice5) normlayer_1 = LayerNormalization( axis=2,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_2 = LayerNormalization( axis=1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") TD4 = (normlayer_1)(slice5) # TD4 = (normlayer_2)(TD4) # TD4 = TimeDistributed(Dense(32, activation = PReLU() ,kernel_initializer=y_init,bias_initializer=const_init,use_bias=True))(TD4) # TD4 = Reshape((2,-1))(TD4) "relu", TD4 = Dense(2, activation = "relu", kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD4) TD4 = Dense(1, activation = "relu", kernel_initializer=y_init,bias_initializer=const_init,use_bias=True )(TD4) TD4 = Flatten()(TD4) # # R1=LSTM(64)(X_reshaped)# cnn1 = Conv1D(filters=64, kernel_size=(2),activation =LeakyReLU(.4),kernel_initializer=y_init,use_bias=True)(cnn1) concatenated = Concatenate()([TD4,TD3,TD2,TD1]) cnn1 = (concatenated) cnn1 = Dense(512,kernel_initializer=y_init, activation ="relu",bias_initializer=const_init,use_bias=True )(cnn1) cnn1 = Dense(256,kernel_initializer=y_init, activation ="relu",bias_initializer=const_init,use_bias=True )(cnn1) X= cnn1 # X = Conv2D(64, 4, strides=(2),padding="valid",activation="elu", kernel_initializer=x_init, data_format="channels_first")(X) # X = Conv2D(128, 4, strides=(2),padding="valid",activation="elu",kernel_initializer=x_init, data_format="channels_first")(X) 3cnn # 'Dense' is the basic form of a neural network layer # # Input Layer of state size(4) and Hidden Layer with 512 nodes # X = Dense(256, activation="relu", kernel_initializer=x_init)(X) # X = Dense(64, activation="relu", kernel_initializer=x_init)(X) # # X = Dense(64, activation="relu", kernel_initializer=x_init)(X) # X = Dense(256, activation="relu", kernel_initializer=const_init,use_bias=True, bias_initializer=truncatedn_init)(X) # # Hidden layer with 256 nodes # # Hidden layer with 64 nodes # X = Dense(64, activation="relu", kernel_initializer=truncatedn_init, bias_initializer=const_init)(X) if dueling: state_value = Dense(1)(X) state_value = Lambda(lambda s: K.expand_dims( s[:, 0], -1), output_shape=(action_space,))(state_value) action_advantage = Dense(action_space)(X) action_advantage = Lambda(lambda a: a[:, :] - K.mean( a[:, :], keepdims=True), output_shape=(action_space,))(action_advantage) X = Add()([state_value, action_advantage]) else: # Output Layer with # of actions: 2 nodes (left, right) X = Dense(action_space, activation="relu", kernel_initializer='he_uniform')(X) model = Model(inputs=X_input, outputs=X, name='Hydra-4') model.compile(loss=tf.keras.losses.Huber(delta=3), metrics=['mean_absolute_error','accuracy'] ,optimizer=Adam( lr=self.learning_rate)) # model.compile(loss="mean_squared_error", optimizer=Adam(lr=0.00025,epsilon=0.01), metrics=["accuracy"]) model.summary() return model
def build_Parrallel_64(self): self.network_size = 256 X_input = Input(shape=(self.REM_STEP*86) ) # X_input = Input(shape=(self.REM_STEP*7,)) input_reshape=(self.REM_STEP,86) X = X_input truncatedn_init = initializers.TruncatedNormal(0, 1e-2) x_init = "he_uniform" y_init = initializers.glorot_uniform() const_init = initializers.constant(1e-2) X_reshaped = Reshape(input_reshape)(X_input) # slice2 = Permute((2,1))(slice2) #normailsation fort each dimension normlayer_1 = LayerNormalization( axis=-1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") normlayer_2 = LayerNormalization( axis=-1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") x = normlayer_2(X_input) x = Dense(90,activation=LeakyReLU(0.1), kernel_initializer=initializers.RandomNormal(stddev=2), bias_initializer=truncatedn_init, use_bias = True)(x) out_a = (x) x = normlayer_1(X_reshaped) x = TimeDistributed(Dense(90, activation=LeakyReLU(0.1),kernel_initializer=initializers.RandomNormal(stddev=2), bias_initializer=truncatedn_init, use_bias = True))(x) x = MaxPooling1D((2))(x) x =Flatten()(x) out_b = (x) # x = Conv2D(4,(1,2),strides=(1,1), padding = "valid", activation="softmax", kernel_initializer=x_init , data_format="channels_first")(t) # x = MaxPooling2D((4,2))(x) # x = LocallyConnected2D(8 ,(2,2),strides=(1,1), padding = "valid", activation="relu", kernel_initializer=x_init , data_format="channels_first")(x) # # x = MaxPooling2D((1,2))(x) # # x=Dropout(0.3)(x) # # x =Flatten()(x) # # x = Dense(64, activation="relu")(x) # # # x = Dense(64, activation="relu")(x) # # out_c = Dense(64, activation='relu', # # kernel_initializer='he_uniform')(x) # out_c= (x) concatenated = concatenate([out_a, out_b]) # model_final.add(Reshape((4,11,2), input_shape=(88,))) # model_final.add(concatted) # model_final.add(Flatten()) # model_final.add(Dense(256, activation='relu', kernel_initializer='he_uniform')) # # model_final.add(Dense(64, activation='relu', kernel_initializer='he_uniform')) # out_d= Dropout(0.4)(concatenated) # out_d= MaxPooling2D((8,1))(out_d) out_d = (concatenated) out_d = Dense(512, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=2))(out_d) out_d = Dense(256, activation='relu', kernel_initializer=initializers.RandomNormal(stddev=2))(out_d) state_value = Dense(1, kernel_initializer='he_uniform')(out_d) state_value = Lambda(lambda s: K.expand_dims( s[:, 0], -1), output_shape=(self.action_space,))(state_value) action_advantage = Dense( self.action_space, activation='linear', kernel_initializer='he_uniform')(out_d) action_advantage = Lambda(lambda a: a[:, :] - K.mean( a[:, :], keepdims=True), output_shape=(self.action_space,))(action_advantage) out = Add()([state_value, action_advantage]) model_final = Model([X_input], out, name='Pa12_model') model_final.compile(optimizer=Adam( lr=self.learning_rate), loss=tf.keras.losses.Huber(delta=2.35), metrics=['mean_absolute_error','accuracy'] ) # RMSprop(lr=self.learning_rate, rho=0.95, decay=25e-5, moepsilon=self.epsilon), metrics=["accuracy"]) print(model_final.summary()) return model_final
def build_modelPar1(self, dueling=True): self.network_size = 256 X_input = Input(shape=(self.REM_STEP*46)) # X_input = Input(shape=(self.REM_STEP*7,)) input_reshape=(self.REM_STEP,1,46) X = X_input truncatedn_init = initializers.TruncatedNormal(0, 1e-2) x_init = initializers.GlorotNormal() y_init = initializers.glorot_uniform() const_init = initializers.constant(1e-2) X_reshaped = Reshape((2,-1))(X_input) normlayer_0 = LayerNormalization( axis=-1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros",) normlayer_1 = LayerNormalization( axis=-1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros",gamma_initializer="ones") normlayer_2= LayerNormalization( axis=-1,trainable=True,epsilon =0.0001,center=True, scale=True, beta_initializer="zeros", gamma_initializer="ones") const_init = initializers.constant(1e-2) t = Reshape(input_reshape)(X) cnn2 = (Conv2D(filters=32, activation = "relu", kernel_initializer=x_init, kernel_size=(1), strides =(2,1), padding = "valid"))(t) cnn2 = normlayer_0(cnn2) cnn2 = Reshape((1,-1,))(cnn2) cnn2 = (LocallyConnected1D(filters=64, activation = "relu",kernel_initializer=x_init, kernel_size=(1), strides =(1), padding = "valid"))(cnn2) # cnn2 = Flatten()(cnn2) cnn1 = TimeDistributed(Dense(64, activation = "tanh" , kernel_initializer=x_init,))(X_reshaped) cnn1 = normlayer_1(cnn1) cnn1 = TimeDistributed(Dense(32, activation="tanh", kernel_initializer=x_init,))(cnn1) # cnn1 = Flatten()(cnn1) cnn1 = Reshape((1,-1,))(cnn1) conc = concatenate([cnn1,cnn2]) f = Flatten()(conc) w = Dense(512, activation="relu",kernel_initializer=x_init)(f) w = Dense(256, activation="relu",kernel_initializer=x_init)(w) state_value = Dense(1, kernel_initializer=x_init)(w) state_value = Lambda(lambda s: K.expand_dims( s[:, 0], -1), output_shape=(self.action_space,))(state_value) action_advantage = Dense( self.action_space, activation='linear', kernel_initializer=x_init)(w) action_advantage = Lambda(lambda a: a[:, :] - K.mean( a[:, :], keepdims=True), output_shape=(self.action_space,))(action_advantage) out = Add()([state_value, action_advantage]) model = Model([X_input], out, name='TCnn-model_1') if self.optimizer_model == 'Adam': optimizer = Adam(lr=self.learning_rate, clipnorm=2.) elif self.optimizer_model == 'RMSProp': optimizer = RMSprop(self.learning_rate, 0.99, 0.0, 1e-6,) else: print('Invalid optimizer!') model.compile(loss=tf.keras.losses.Huber(delta=10), metrics=['mean_absolute_error','accuracy'] , optimizer=optimizer) model.summary() return model