def build_model(state_size, action_size): # seed_nb = 14 # np.random.seed(seed_nb) # tf.random.set_seed(seed_nb) learning_rate = 0.01 # model = Sequential() # model.add(Dense(64, input_dim = state_size, activation = 'relu', kernel_initializer = initializers.glorot_uniform(seed = seed_nb))) # model.add(Dense(64, activation = 'relu', kernel_initializer = initializers.glorot_uniform(seed = seed_nb))) # model.add(Dense(action_size, activation = 'linear', kernel_initializer = initializers.glorot_uniform(seed = seed_nb))) # model.compile(loss = 'mse', optimizer = Adam(lr = learning_rate)) model = Sequential() model.add( Dense(64, input_dim=state_size, activation='sigmoid', kernel_initializer=initializers.Ones())) # model.add(Dropout(0.3)) model.add( Dense(64, activation='sigmoid', kernel_initializer=initializers.Ones())) # model.add(Dropout(0.3)) model.add( Dense(action_size, activation='linear', kernel_initializer=initializers.Ones())) model.compile(loss='mse', optimizer=Adam(lr=learning_rate)) # model = Sequential() # model.add(Dense(64, input_dim = state_size, activation = 'relu', kernel_initializer = initializers.Ones())) # model.add(Dense(64, activation = 'relu', kernel_initializer = initializers.Ones())) # model.add(Dense(action_size, activation = 'linear', kernel_initializer = initializers.Ones())) # model.compile(loss = 'mse', optimizer = Adam(lr = learning_rate)) return model
def ResidualBlockGenerator(x, channels_in, channels_out, stepChanger=False): #stepChanger is for reducing size of the feature map like 56 x 56 to 28 x 28 if stepChanger: shortcut = x group_size = (channels_in) // (cardinality) groups = [] for i in range(cardinality): groupsElements = layers.Conv2D(group_size, kernel_size=(1, 1), strides=(2, 2), padding='same')(x) groupsElements = AddCommonLayers(groupsElements) groupsElements = layers.Conv2D(group_size, kernel_size=(3, 3), padding='same')(groupsElements) groupsElements = AddCommonLayers(groupsElements) groups.append(groupsElements) x = layers.concatenate(groups) x = layers.Conv2D(channels_out, kernel_size=(1, 1))(x) x = layers.BatchNormalization()(x) layer = layers.Conv2D(channels_out, kernel_size=(2, 2), strides=(2, 2), use_bias=False, kernel_initializer=initializers.Ones()) layer.trainable = False shortcut = layer(shortcut) shortcut = layers.BatchNormalization()(shortcut) x = layers.add([shortcut, x]) x = layers.LeakyReLU(alpha=0.)(x) else: shortcut = x group_size = (channels_in) // (cardinality) groups = [] for i in range(cardinality): groupsElements = layers.Conv2D(group_size, kernel_size=(1, 1))(x) groupsElements = AddCommonLayers(groupsElements) groupsElements = layers.Conv2D(group_size, kernel_size=(3, 3), padding='same')(groupsElements) groupsElements = AddCommonLayers(groupsElements) groups.append(groupsElements) x = layers.concatenate(groups) x = layers.Conv2D(channels_out, kernel_size=(1, 1))(x) x = layers.BatchNormalization()(x) if shortcut.shape[3] != x.shape[3]: layer = layers.Conv2D(channels_out, kernel_size=(1, 1), use_bias=False, kernel_initializer=initializers.Ones()) layer.trainable = False shortcut = layer(shortcut) shortcut = layers.BatchNormalization()(shortcut) x = layers.add([shortcut, x]) x = layers.LeakyReLU(alpha=0.)(x) return x
def semiconv_model(function=None): inputs = Input(shape=(256,256,3)) x = SemiConv2D(3, 3, padding='same', kernel_initializer=initializers.Ones(), function=function, normalized_position=False)(inputs) return Model(inputs, x)
def build(self, inputs_shape): ''' self.w_linear = self.add_weight(name='s2s_w_linear', shape=(inputs_shape[-1], self.output_dim), initializer=self.kernel_initializer) self.b_linear = self.add_weight(name='s2s_b_linear', shape=(self.output_dim,), initializer=initializers.Zeros()) ''' self.w_recurrent = self.add_weight( name='s2s_w_recurrent', shape=(self.output_dim * 2, self.output_dim * 4), initializer=self.recurrent_initializer) self.b_recurrent_a = self.add_weight(name='s2s_b_recurrent_a', shape=(self.output_dim * 1, ), initializer=initializers.Zeros()) self.b_recurrent_b = self.add_weight(name='s2s_b_recurrent_b', shape=(self.output_dim * 1, ), initializer=initializers.Ones()) self.b_recurrent_c = self.add_weight(name='s2s_b_recurrent_c', shape=(self.output_dim * 2, ), initializer=initializers.Zeros()) super(Set2SetS, self).build(inputs_shape)
def bias_initializer(shape, *args, **kwargs): return K.concatenate([ self.bias_initializer((self.units, ), *args, **kwargs), initializers.Ones()((self.units, ), *args, **kwargs), self.bias_initializer((self.units * 3, ), *args, **kwargs), ])
def regressor_tunning(kernel_initializer='he_uniform', bias_initializer=initializers.Ones()): model = Sequential() if n_hidden == 0: model.add( LSTM(units=units, input_shape=(steps, features_num), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)) model.add(LeakyReLU(alpha=0.2)) model.add(Dropout(0.2)) else: model.add( LSTM(units=units, input_shape=(steps, features_num), return_sequences=True, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)) model.add(LeakyReLU(alpha=0.2)) model.add(Dropout(0.2)) model.add( LSTM(units=units, input_shape=(steps, features_num), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)) model.add(LeakyReLU(alpha=0.2)) model.add(Dropout(0.2)) model.add(Dense(1, activation='linear')) optimizer = optimizers.RMSprop() model.compile(loss='mse', metrics=['mse', 'mae'], optimizer=optimizer) return model
def build(self, input_shape): self.kernel = self.add_weight(name='kernel', shape=(input_shape, ), initializer=initializers.Ones(), trainable=True) self.output_dim = input_shape super(balanceGrad, self).build(input_shape)
def bias_initializer(_, *args, **kwargs): return kb.concatenate([self.bias_initializer( (self.n_hidden,), *args, **kwargs), initializers.Ones()((self.n_hidden,), *args, **kwargs), self.bias_initializer( (self.n_hidden * 2,), *args, **kwargs)])
def bias_initializer(_, *args, **kwargs): return K.concatenate([ self.bias_initializer((self.filters, ), *args, **kwargs), initializers.Ones()((self.filters, ), *args, **kwargs), self.bias_initializer((self.filters * 2, ), *args, **kwargs), ])
def build(self, input_shape): shape = input_shape[2:] # accumulators self._mean = self.add_weight('mean', shape, initializer=initializers.Zeros(), trainable=False) self._var = self.add_weight('var', shape, initializer=initializers.Ones(), trainable=False) self._count = self.add_weight('count', (1,), initializer=initializers.Zeros(), trainable=False) self._std = K.sqrt(self._var) super(RunningMeanStd, self).build(input_shape)
def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] output_shape = (int(input_shape[self.axis]), ) gamma_initializer = initializers.Ones() beta_initializer = initializers.Zeros() self.gamma = K.variable(gamma_initializer(output_shape)) self.beta = K.variable(beta_initializer(output_shape)) self.trainable_weights = [self.gamma, self.beta]
def build(self, input_shape): self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], initializer=initializers.Ones(), trainable=True) self.beta = self.add_weight(name='beta', shape=input_shape[-1:], initializer=initializers.Zeros(), trainable=True) super(LayerNormalization, self).build(input_shape)
def controller_initializer(shape, *args, **kwargs): return K.concatenate([ initializers.Zeros()((self.batch_size, self.memory.shape[0]), *args, **kwargs), initializers.Ones()((self.batch_size, self.memory.shape[0]), *args, **kwargs), initializers.Zeros()((self.batch_size, self.memory.shape[0]), *args, **kwargs), initializers.Zeros()((self.batch_size, self.memory.shape[0]), *args, **kwargs), ])
def tp1_node_update(graph_node_embs, node_rel, node_rel_weight, max_nodes, max_bi_relations, embed_dim, label): """ graph_node_embs has shape (batch_size, max_nodes per graph, embed_dim feats). """ dense_dim = embed_dim x = gather_layer([graph_node_embs, node_rel]) logging.debug('After gather3 shape: {0}'.format(x.shape)) x = Reshape((max_nodes * max_bi_relations, 2 * embed_dim))(x) x = TimeDistributed( Dense( dense_dim, kernel_initializer=initializers.Ones(), bias_initializer=initializers.Zeros(), name=label + '_dense1'))(x) # TODO: re-enable the batch normalization. # x = BatchNormalization(axis=2, name=label + '_bn1')(x) x = Activation('relu')(x) x = TimeDistributed( Dense( dense_dim, kernel_initializer=initializers.Ones(), bias_initializer=initializers.Zeros(), name=label + '_dense2'))(x) # x = BatchNormalization(axis=2, name=label + '_bn2')(x) x = Activation('relu')(x) normalizer = Reshape((max_nodes * max_bi_relations,))(node_rel_weight) normalizer = RepeatVector(dense_dim)(normalizer) normalizer = Permute((2, 1))(normalizer) x = Multiply()([x, normalizer]) x = Reshape((max_nodes, max_bi_relations, dense_dim))(x) x = Lambda( lambda xin: K.sum(xin, axis=2), output_shape=(None, max_nodes * max_bi_relations, dense_dim), name=label + '_integrate')(x) return x
def build(self, input_shape): self.num_layers = input_shape[1] self.W = self.add_weight(shape=(self.num_layers, ), initializer=initializers.Zeros(), regularizer=regularizers.get( regularizers.l2(self.l2_coef)), name='{}_w'.format(self.name)) if self.scale: self.gamma = self.add_weight(shape=(1, ), initializer=initializers.Ones(), name='{}_gamma'.format(self.name)) super(WeightedAverage, self).build(input_shape)
def build(self, input_shape): # input_shape (None,40) print(input_shape) input_dim = input_shape[1] if self.H == 'Glorot': self.H = np.float32(np.sqrt(1.5 / (int(input_dim) + self.units))) #print('Glorot H: {}'.format(self.H)) if self.w_lr_multiplier == 'Glorot': self.w_lr_multiplier = np.float32( 1. / np.sqrt(1.5 / (int(input_dim) + self.units))) #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier)) self.w_constraint = Clip(-self.H, self.H) #self.w_initializer = initializers.RandomUniform(-self.H, self.H) self.w_initializer = initializers.Ones() self.w_regularizer = regularizers.l2(0.01) self.w = self.add_weight(shape=(input_dim, self.units), initializer=self.w_initializer, name='w', regularizer=self.w_regularizer, constraint=self.w_constraint) #self.bw=self.add_weight(shape=(input_dim,self.units), # initializer=self.w_initializer, # name='bw', # regularizer=self.w_regularizer, # constraint=self.w_constraint) #self.bw = binarize(self.w, H=self.H) if self.use_bias: self.lr_multipliers = [ self.w_lr_multiplier, self.bias_lr_multiplier ] self.bias = self.add_weight( shape=(self.units, ), # is this shape right??? # 假设这个weight每一个都不一样好了先! initializer=self.w_initializer, name='bias') #regularizer=self.bias_regularizer, #constraint=self.bias_constraint) else: self.lr_multipliers = [self.w_lr_multiplier] self.bias = None self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) self.built = True self.binary = binarize(self.w, H=self.H)
def cross_validation(model, X_train, X_val, y_train, y_val): def get_model(dropout=0.1, learning=0.1, kernel='uniform'): return create_model(X_train, dropout=dropout, learning=learning, kernel=kernel) # create the sklearn model for the network model_init_batch_epoch_CV = KerasRegressor(build_fn=get_model, verbose=1) # we choose the initializers that came at the top in our previous cross-validation!! zero = initializers.Zeros() ones = initializers.Ones() constant = initializers.Constant(value=0) rand = initializers.RandomNormal( mean=0.0, stddev=0.05, seed=None ) # cannot use this option for the moment, need to find the correct syntax uniform = 'uniform' kernel = [zero, ones, uniform] batches = [1000, 5000, 10000] epochs = [10, 30] dropout = [0.1, 0.2, 0.5] learning = [0.01, 0.001, 0.0001] # grid search for initializer, batch size and number of epochs param_grid = dict(batch_size=batches, epochs=epochs, dropout=dropout, kernel=kernel, learning=learning) grid = GridSearchCV(estimator=model_init_batch_epoch_CV, param_grid=param_grid, cv=3, n_jobs=-1) grid_result = grid.fit(X_train, y_train) # printresults print( f'Best Accuracy for {grid_result.best_score_:.4} using {grid_result.best_params_}' ) means = grid_result.cv_results_['mean_test_score'] stds = grid_result.cv_results_['std_test_score'] params = grid_result.cv_results_['params'] for mean, stdev, param in zip(means, stds, params): print(f'mean={mean:.4}, std={stdev:.4} using {param}')
def build_model(self): # Define input layer states = layers.Input(shape=(self.state_size, ), name='states') # Add hidden layer net_states = layers.Dense(units=1, kernel_initializer=initializers.Ones(), bias_initializer=initializers.Zeros(), activation=None)(states) # Add final output #net_states = layers.BatchNormalization()(net_states) value = layers.Activation('relu')(net_states) # Create Keras model self.model = models.Model(inputs=states, outputs=value) # Define optimizer and compile model for training with built-in loss function optimizer = optimizers.Adam() self.model.compile(optimizer=optimizer, loss='mse')
def build(self, input_shape): self.theta_p = self.add_weight( name='theta_p', shape=(self.n_centroids, ), initializer=initializers.Constant(1 / self.n_centroids), trainable=True, ) self.u_p = self.add_weight( name='u_p', shape=(self.latent_dims, self.n_centroids), initializer=initializers.he_uniform(), trainable=True, ) self.lambda_p = self.add_weight( name='lambda_p', shape=(self.latent_dims, self.n_centroids), initializer=initializers.Ones(), trainable=True, constraint=NonZero(), ) super(GMMLayer_2, self).build(input_shape)
def _init_model(self): in_me = layers.Input(shape=(2, )) me = layers.Dense(4, activation='relu')(in_me) me_out = layers.Dense(1, activation='tanh')(me) in_opponent = layers.Input(shape=(2, )) opponent = layers.Dense(4, activation='relu')(in_opponent) opponent_out = layers.Dense(1, activation='tanh')(opponent) merged = layers.subtract([me_out, opponent_out]) out = layers.Dense(1, kernel_initializer=initializers.Ones(), activation='tanh')(merged) model = models.Model(inputs=[in_me, in_opponent], outputs=out) model.compile(loss='mse', optimizer=Adam(lr=self.rate)) model.summary() # initialize known states model.fit(self._reshape((0, 10, 150, 20)), [[-1]]) model.fit(self._reshape((10, 0, 20, 150)), [[1]]) return model
model = Sequential() model.add( Dense(count, input_dim=input_dim, kernel_initializer=wi.Zeros(), bias_initializer=wi.Zeros())) plot_weights(weights=model.get_weights(), x=np.arange(0, count, 1), title='Zeros') model = Sequential() model.add( Dense(count, input_dim=input_dim, kernel_initializer=wi.Ones(), bias_initializer=wi.Ones())) plot_weights(weights=model.get_weights(), x=np.arange(0, count, 1), title='Ones') model = Sequential() model.add( Dense(count, input_dim=input_dim, kernel_initializer=wi.Constant(value=3.0), bias_initializer=wi.Constant(value=3.0))) plot_weights(weights=model.get_weights(), x=np.arange(0, count, 1), title='Constant(value=3.0)')
) def test_parameters_by_signature(instance, signature_filter, params): assert parameters_by_signature(instance, signature_filter) == params ################################################## # `keras_initializer_to_dict` Scenarios ################################################## @pytest.mark.parametrize( ["initializer", "initializer_dict"], [ #################### Normal Initializers #################### pytest.param(initializers.zeros(), dict(class_name="zeros"), id="zero_0"), pytest.param(initializers.Zeros(), dict(class_name="zeros"), id="zero_1"), pytest.param(initializers.ones(), dict(class_name="ones"), id="one_0"), pytest.param(initializers.Ones(), dict(class_name="ones"), id="one_1"), pytest.param(initializers.constant(), dict(class_name="constant", value=0), id="c_0"), pytest.param(initializers.Constant(5), dict(class_name="constant", value=5), id="c_1"), pytest.param( initializers.RandomNormal(0.1), dict(class_name="random_normal", mean=0.1, stddev=0.05, seed=None), id="rn_0", ), pytest.param( initializers.random_normal(mean=0.2, stddev=0.003, seed=42), dict(class_name="random_normal", mean=0.2, stddev=0.003, seed=42), id="rn_1", ), pytest.param( initializers.RandomUniform(maxval=0.1), dict(class_name="random_uniform", minval=-0.05, maxval=0.1, seed=None),
def build(self, dafm_type="dafm-afm", optimizer="rmsprop", learning_rate=0.01, activation="linear", Q_jk_initialize=0, section="", section_count=0, model1="", stateful=False, theta_student="False", student_count=0, binary="False"): skills = np.shape(Q_jk_initialize)[1] steps = np.shape(Q_jk_initialize)[0] self.activation = activation if '-' in self.activation: activation = self.custom_activation if dafm_type.split("_")[-1] == "different": skills = int(float(dafm_type.split("_")[-2]) * skills) dafm_type = dafm_type.split('_')[0] if dafm_type.split("_")[0] == "round-fine-tuned": try: self.round_threshold = float(dafm_type.split("_")[-1]) dafm_type = dafm_type.split("_")[0] except: pass q_jk_size = skills if '^' in dafm_type: q_jk_size = skills skills = int(float(dafm_type.split('^')[-1]) * skills) dafm_type = dafm_type.split('^')[0] self.dafm_type = dafm_type if dafm_type == "random-uniform" or dafm_type == "random-normal": qtrainable, finetuning, randomize = True, False, True self.random_init = dafm_type.split('-')[-1] elif dafm_type == "dafm-afm": qtrainable, finetuning, randomize = False, False, False elif dafm_type == "fine-tuned": qtrainable, finetuning, randomize = True, True, False elif dafm_type == "kcinitialize": qtrainable, finetuning, randomize = True, False, False elif dafm_type == "round-fine-tuned": # if not self.round_threshold == -1: # rounded_Qjk = np.abs(Q_jk1 - Q_jk_initialize) # Q_jk1[rounded_Qjk <= self.round_threshold] = Q_jk_initialize[rounded_Qjk <= self.round_threshold] # Q_jk1[rounded_Qjk > self.round_threshold] = np.ones(np.shape(Q_jk_initialize[rounded_Qjk > self.round_threshold])) - Q_jk_initialize[rounded_Qjk > self.round_threshold] # else: Q_jk1 = model1.get_layer("Q_jk").get_weights()[0] Q_jk1 = np.minimum( np.ones(np.shape(Q_jk1)), np.maximum(np.round(Q_jk1), np.zeros(np.shape(Q_jk1)))) model1.get_layer("Q_jk").set_weights([Q_jk1]) return model1 elif dafm_type == "qjk-dense": qtrainable, finetuning, randomize = False, False, False activation_dense = activation elif dafm_type == "random-qjk-dense-normal" or dafm_type == "random-qjk-dense-uniform": qtrainable, finetuning, randomize = False, False, True self.random_init = dafm_type.split('-')[-1] activation_dense = activation else: print("No Valid Model Found") sys.exit() if section == "onehot": section_input = Input(batch_shape=(None, None, section_count), name='section_input') if not theta_student == "False": student_input = Input(batch_shape=(None, None, student_count), name='student_input') virtual_input1 = Input(batch_shape=(None, None, 1), name='virtual_input1') if finetuning: B_k = TimeDistributed(Dense( skills, activation='linear', kernel_initializer=self.f( model1.get_layer("B_k").get_weights()[0]), use_bias=False), name="B_k")(virtual_input1) T_k = TimeDistributed(Dense( skills, activation='linear', kernel_initializer=self.f( model1.get_layer("T_k").get_weights()[0]), use_bias=False), name="T_k")(virtual_input1) bias_layer = TimeDistributed(Dense( 1, activation='linear', use_bias=False, kernel_initializer=self.f( model1.get_layer("bias").get_weights()[0]), trainable=True), name="bias")(virtual_input1) else: B_k = TimeDistributed(Dense(skills, activation='linear', use_bias=False, trainable=True), name="B_k")(virtual_input1) T_k = TimeDistributed(Dense(skills, activation='linear', use_bias=False, trainable=True), name="T_k")(virtual_input1) bias_layer = TimeDistributed(Dense( 1, activation='linear', use_bias=False, kernel_initializer=initializers.Zeros(), trainable=True), name="bias")(virtual_input1) step_input = Input(batch_shape=(None, None, steps), name='step_input') if randomize: if binary == "False": Q_jk = TimeDistributed(Dense( q_jk_size, use_bias=False, activation=activation, kernel_initializer=self.custom_random), trainable=qtrainable, name="Q_jk")(step_input) else: Q_jk = TimeDistributed(BinaryDense( q_jk_size, use_bias=False, activation=activation, kernel_initializer=self.custom_random), trainable=qtrainable, name="Q_jk")(step_input) else: if binary == "False": Q_jk = TimeDistributed(Dense( skills, activation=activation, kernel_initializer=self.f(Q_jk_initialize), use_bias=False, trainable=qtrainable), trainable=qtrainable, name="Q_jk")(step_input) else: Q_jk = TimeDistributed(BinaryDense( skills, activation=activation, kernel_initializer=self.f(Q_jk_initialize), trainable=qtrainable, use_bias=False), name="Q_jk", trainable=qtrainable)(step_input) if dafm_type == "random-qjk-dense-normal" or dafm_type == "random-qjk-dense-uniform": if binary == "False": Q_jk = TimeDistributed(Dense( skills, activation=activation_dense, use_bias=False, kernel_initializer=self.custom_random, trainable=True), name="Q_jk_dense")(Q_jk) else: Q_jk = TimeDistributed(BinaryDense( skills, activation=activation_dense, use_bias=False, kernel_initializer=self.custom_random, trainable=True), name="Q_jk_dense")(Q_jk) elif dafm_type == "qjk-dense": if binary == 'False': Q_jk = TimeDistributed(Dense( skills, activation=activation_dense, use_bias=False, kernel_initializer=initializers.Identity(), trainable=True), name="Q_jk_dense")(Q_jk) else: Q_jk = TimeDistributed(BinaryDense( skills, activation=activation_dense, use_bias=False, kernel_initializer=initializers.Identity(), trainable=True), name="Q_jk_dense")(Q_jk) else: pass Qjk_mul_Bk = multiply([Q_jk, B_k]) sum_Qjk_Bk = TimeDistributed(Dense( 1, activation='linear', trainable=False, kernel_initializer=initializers.Ones(), use_bias=False), trainable=False, name="sum_Qjk_Bk")(Qjk_mul_Bk) P_k = SimpleRNN(skills, kernel_initializer=initializers.Identity(), recurrent_initializer=initializers.Identity(), use_bias=False, trainable=False, activation='linear', return_sequences=True, name="P_k")(Q_jk) Qjk_mul_Pk_mul_Tk = multiply([Q_jk, P_k, T_k]) sum_Qjk_Pk_Tk = TimeDistributed( Dense(1, activation='linear', trainable=False, kernel_initializer=initializers.Ones(), use_bias=False), trainable=False, name="sum_Qjk_Pk_Tk")(Qjk_mul_Pk_mul_Tk) Concatenate = concatenate([bias_layer, sum_Qjk_Bk, sum_Qjk_Pk_Tk]) if not (theta_student == "False"): if finetuning: theta = TimeDistributed(Dense( 1, activation="linear", use_bias=False, kernel_initializer=self.f( model1.get_layer("theta").get_weights()[0])), name='theta')(student_input) else: theta = TimeDistributed(Dense(1, activation="linear", use_bias=False), name='theta')(student_input) Concatenate = concatenate([Concatenate, theta]) if section == "onehot": if finetuning: S_k = TimeDistributed(Dense( 1, activation="linear", use_bias=False, kernel_initializer=self.f( model1.get_layer("S_k").get_weights()[0])), name='S_k')(section_input) else: S_k = TimeDistributed(Dense(1, activation="linear", use_bias=False), name='S_k')(section_input) Concatenate = concatenate([Concatenate, S_k]) output = TimeDistributed(Dense(1, activation="sigmoid", trainable=False, kernel_initializer=initializers.Ones(), use_bias=False), trainable=False, name="output")(Concatenate) if section == "onehot" and not (theta_student == "False"): model = Model(inputs=[ virtual_input1, step_input, section_input, student_input ], outputs=output) elif section == "onehot" and theta_student == "False": model = Model(inputs=[virtual_input1, step_input, section_input], outputs=output) elif not (section == "onehot") and not (theta_student == "False"): model = Model(inputs=[virtual_input1, step_input, student_input], outputs=output) else: model = Model(inputs=[virtual_input1, step_input], outputs=output) d_optimizer = { "rmsprop": optimizers.RMSprop(lr=learning_rate), "adam": optimizers.Adam(lr=learning_rate), "adagrad": optimizers.Adagrad(lr=learning_rate) } model.compile(optimizer=d_optimizer[optimizer], loss=self.custom_bce) return model
def AddResidualBlock(x, each_channel_size, stepchanger = False): if stepchanger: shortcut = x x = layers.Conv2D(each_channel_size, kernel_size=common_filter_size, strides=(2,2), padding='same', activation='relu')(x) x = bn(x) x = layers.Conv2D(each_channel_size, kernel_size=common_filter_size, strides=(1,1), padding='same', activation='relu')(x) x = bn(x) # Took some time to fogure out how to zero-pad when increase in channel layer = layers.Conv2D(x.shape[3], kernel_size=2, strides=(2,2), use_bias=False, kernel_initializer=initializers.Ones()) # Not learned! layer.trainable = False shortcut = layer(shortcut) shortcut = bn(shortcut) x = layers.add([shortcut,x]) else: shortcut = x x = layers.Conv2D(each_channel_size, kernel_size=common_filter_size, strides=(1,1), padding='same', activation='relu')(x) x = bn(x) x = layers.Conv2D(each_channel_size, kernel_size=common_filter_size, strides=(1,1), padding='same', activation='relu')(x) x = bn(x) x = layers.add([shortcut, x]) return x
def build_classifier(self): input = Input(shape=(dim_less, )) model = Dense(units=1, activation='sigmoid', init=keras_init.Ones())(input) # model = Dense(units=1, activation='sigmoid')(input) return Model(input, model)
from keras.callbacks import History history = History() print("[INFO] loading dataset...") X_train = np.load('X_train.npy') X_val = np.load('X_val.npy') #X_test = np.load('X_test.npy') y_train = np.load('y_train.npy') y_val = np.load('y_val.npy') #y_test = np.load('y_test.npy') print("[INFO] build and compiling model...") zero = initializers.Zeros() ones = initializers.Ones() #constant = initializers.Constant(value=0 #rand = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)) #model.add(layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, # beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', # moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, # beta_constraint=None, gamma_constraint=None)) relu = 'relu' #ly = LeakyReLU(alpha=0.05) #activation=ly, model = tf.keras.Sequential() model.add(
def create_model(X_train1, y_train1, X_train2, y_train2, X_train3, y_train3, X_train4, y_train4, X_train5, y_train5, X_train6, y_train6, X_train7, y_train7, X_train8, y_train8): # L1 = [{'batch_size': 1000, 'dropout': 0.2, 'epochs': 30, 'kernel': <keras.initializers.Ones object at 0x7f2532713128>, 'learning': 0.01}] # 1JHC ones = initializers.Ones() model1 = tf.keras.Sequential() model1.add( layers.Dense(64, input_dim=X_train1.shape[1], kernel_initializer=ones, activation='relu')) model1.add(layers.Dropout(0.2)) model1.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model1.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model1.add(layers.Dropout(0.2)) model1.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0) model1.compile(loss='mse', optimizer=rms, metrics=['mae']) model1.output_shape model1.summary() model1.get_config() model1.get_weights() print("[INFO] training model 1...") history = model1.fit(X_train1, y_train1, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) # we choose the initializers that came at the top in our previous cross-validation!! # zero = initializers.Zeros() # ones = initializers.Ones() # constant = initializers.Constant(value=0) # rand = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None) # cannot use this option for the moment, need to find the correct syntax # uniform = 'uniform' model1.save('model1.h5') print("[INFO] Preparing model 2...") # L2 = [{'batch_size': 1000, 'dropout': 0.2, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 2JHH model2 = tf.keras.Sequential() model2.add( layers.Dense(64, input_dim=X_train2.shape[1], kernel_initializer='uniform', activation='relu')) model2.add(layers.Dropout(0.2)) model2.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model2.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model2.add(layers.Dropout(0.2)) model2.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model2.compile(loss='mse', optimizer=rms, metrics=['mae']) model2.output_shape model2.summary() model2.get_config() model2.get_weights() print("[INFO] training model 2...") history = model2.fit(X_train2, y_train2, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) model2.save('model2.h5') print("[INFO] Preparing model 3...") # L3 = [{'batch_size': 1000, 'dropout': 0.2, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 1JHN # L4 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 2JHN # L5 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 2JHC # L6 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 3JHH # L7 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 3JHC # L8 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 3JHN model3 = tf.keras.Sequential() model3.add( layers.Dense(64, input_dim=X_train3.shape[1], kernel_initializer='uniform', activation='relu')) model3.add(layers.Dropout(0.2)) model3.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model3.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model3.add(layers.Dropout(0.2)) model3.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model3.compile(loss='mse', optimizer=rms, metrics=['mae']) model3.output_shape model3.summary() model3.get_config() model3.get_weights() print("[INFO] training model 3...") history = model3.fit(X_train3, y_train3, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) model3.save('model3.h5') print("[INFO] Preparing model 4...") # L4 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 2JHN # L5 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 2JHC # L6 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 3JHH # L7 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 3JHC # L8 = [{'batch_size': 1000, 'dropout': 0.1, 'epochs': 30, 'kernel': 'uniform', 'learning': 0.001}] # 3JHN model4 = tf.keras.Sequential() model4.add( layers.Dense(64, input_dim=X_train4.shape[1], kernel_initializer='uniform', activation='relu')) model4.add(layers.Dropout(0.1)) model4.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model4.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model4.add(layers.Dropout(0.1)) model4.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model4.compile(loss='mse', optimizer=rms, metrics=['mae']) model4.output_shape model4.summary() model4.get_config() model4.get_weights() print("[INFO] training model 4...") history = model4.fit(X_train4, y_train4, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) print("[INFO] Preparing model 5...") model4.save('model4.h5') model5 = tf.keras.Sequential() model5.add( layers.Dense(64, input_dim=X_train5.shape[1], kernel_initializer='uniform', activation='relu')) model5.add(layers.Dropout(0.1)) model5.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model5.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model5.add(layers.Dropout(0.1)) model5.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model5.compile(loss='mse', optimizer=rms, metrics=['mae']) model5.output_shape model5.summary() model5.get_config() model5.get_weights() print("[INFO] training model 5...") history = model5.fit(X_train5, y_train5, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) model5.save('model5.h5') print("[INFO] Preparing model 6...") model6 = tf.keras.Sequential() model6.add( layers.Dense(64, input_dim=X_train6.shape[1], kernel_initializer='uniform', activation='relu')) model6.add(layers.Dropout(0.1)) model6.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model6.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model6.add(layers.Dropout(0.1)) model6.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model6.compile(loss='mse', optimizer=rms, metrics=['mae']) model6.output_shape model6.summary() model6.get_config() model6.get_weights() print("[INFO] training model 6...") history = model6.fit(X_train6, y_train6, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) model6.save('model6.h5') print("[INFO] Preparing model 7...") model7 = tf.keras.Sequential() model7.add( layers.Dense(64, input_dim=X_train7.shape[1], kernel_initializer='uniform', activation='relu')) model7.add(layers.Dropout(0.1)) model7.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model7.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model7.add(layers.Dropout(0.1)) model7.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model7.compile(loss='mse', optimizer=rms, metrics=['mae']) model7.output_shape model7.summary() model7.get_config() model7.get_weights() print("[INFO] training model 7...") history = model7.fit(X_train7, y_train7, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) model7.save('model7.h5') print("[INFO] Preparing model 8...") model8 = tf.keras.Sequential() model8.add( layers.Dense(64, input_dim=X_train8.shape[1], kernel_initializer='uniform', activation='relu')) model8.add(layers.Dropout(0.1)) model8.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model8.add( layers.Dense(64, kernel_initializer='uniform', activation='relu')) model8.add(layers.Dropout(0.1)) model8.add(layers.Dense(1)) # compile model rms = RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) model8.compile(loss='mse', optimizer=rms, metrics=['mae']) model8.output_shape model8.summary() model8.get_config() model8.get_weights() print("[INFO] training model 8...") history = model8.fit(X_train8, y_train8, epochs=30, verbose=1, batch_size=1000) # list all data in history print(history.history.keys()) model8.save('model8.h5')
def testing_network( world_size: [int], target_num: int, drone_goal: str, drone_num: int, extra_drone_num: int, world_gain_peak_range: [float], world_gain_var_range: [float], world_evolution_speed: [float], drone_comm: float, drone_view: float, drone_memory: int, drone_battery: float, action_step: int, max_age: int, lookahead_step: int, malus: float, final_bonus: float, malus_sm: float, random_episode: bool, alpha: float, alpha_dec: float, epsilon: float, epsilon_dec: float, temperature: float, temperature_dec: float, state_MR: int, limit_MR: bool, three_MR: bool, prioritized: bool, perc_pos_MR: float, perc_neg_MR1: float, perc_neg_MR2: float, pretrain_episode: int, train_episode: int, test_episode: int, step_num: int, testing_update: int, model_update: int, batch_update: int, batch_size: int, learning_rate: float, neurons_fully: int, drop_rate: float, dueling: bool, epochs_num: int, gamma: float, steps_per_epoch: int, verbose: bool, version: int): random.seed(100) initializers.Ones() env: DronesDiscreteEnv = gym.make('DronesDiscrete-v0') # Generate a world-map, where a groups of target is moving world = WorldMap(world_size, target_num, world_gain_peak_range, world_gain_var_range, world_evolution_speed) # Define the log file output_log = "Log_files/" # Initialize relay memory memory_replay = [] mem2 = [] prob_MR = [] pos_MR = [] neg_MR1 = [] neg_MR2 = [] mem_replay_with_loss = create_mem_replay_with_loss() # Initialize the success data vector success_vec = [] success_vec1 = [] success_episodes = [] Ac0 = [] Ac1 = [] Ac2 = [] Ac3 = [] Ac4 = [] A_0 = 0 A_1 = 0 A_2 = 0 A_3 = 0 A_4 = 0 SMR = [0, 0, 0, 0, 0] GSMR = [] AM = [0, 0, 0] AAM = [] VARIATIONS = [] VAR = [] VARQ = 0 MINQ = [] MINQV = 0 MAXQ = [] MAXQV = 0 MEANQ = [] MEANQV = 0 Mappa = np.zeros((world_size[0], world_size[0])) LOSSES = np.zeros((4, 5000)) # Setting neural network step_model = create_step_model(world_size[0], 5, learning_rate, neurons_fully, drop_rate, dueling) #step_model.load_weights('target_model23_v0.h5') #print (step_model.summary()) target_model = keras.models.clone_model(step_model) target_model.set_weights(step_model.get_weights()) #print (target_model.summary()) plot_model(step_model, to_file='model_plot.pdf', show_shapes=True, show_layer_names=True) # Setting early stopping and history #early = EarlyStopping(monitor='val_loss', patience=patience, verbose=0, mode='auto') history = LossHistory() H = [] # ------------------------ PRE-PROCESSING PHASE ------------------------ # Pre-training phase: # Use a random policy to choose actions # Save the sample [state, action, reward, new_state] in the replay memory # No training is carried out if prioritized == True: update_MR = True else: update_MR = False if state_MR == 0: pretraining_done = True else: pretraining_done = False if pretraining_done == True: #print("INITIALIZATION MEMORY-REPLAY...") for episode_index in tqdm(range(pretrain_episode)): counter = 0 #print("Episode index: ", episode_index) # Generate a random episode if random_episode: # Generate new map world = WorldMap(world_size, target_num, world_gain_peak_range, world_gain_var_range, world_evolution_speed) #print ("0") log_name = output_log + "env_pretrain_" + str( episode_index) + ".txt" log_file = open(log_name, "w+") # Configure new environment train = True env.configure_environment(world, drone_goal, drone_num, extra_drone_num, drone_comm, drone_view, drone_memory, drone_battery, action_step, max_age, lookahead_step, malus, final_bonus, log_file, verbose, train, malus_sm) #print ("1") # Get the initial state of the system # If needed, normalize the state as you desire state = env.get_state() z = 0 #print ("2") for step in range(step_num): for j in range(drone_num): own_map = state[j] others_map = np.zeros((world_size[0], world_size[0])) for i in range(drone_num): if i != j: others_map += state[i] #print ("3") Mappa[int(np.argmax(state[0]) / world_size[0]), np.argmax(state[0]) % world_size[0]] += 1 Mappa[int(np.argmax(state[1]) / world_size[0]), np.argmax(state[1]) % world_size[0]] += 1 number = env.get_available_targets() if number == 0: AM[0] += 1 elif number == 1: AM[1] += 1 else: AM[2] += 1 model_input_state = state[[0, 1, drone_num + 1], :] model_input_state[0] = own_map model_input_state[1] = others_map action = env.get_random_direction() # Random action for i in range(drone_num): if i != j: action[i] = 0 #print (action) env.action_direction(action) # Carry out a new action new_state = env.get_state() # New system state #print ("4") model_input_newstate = new_state[[0, 1, drone_num + 1], :] model_input_newstate[0] = new_state[j] model_input_newstate[1] = others_map explore_reward, exploit_reward = env.get_reward( ) # Obtained reward (exploit + explore rewards) reward = exploit_reward[j] # Overall reward (only exploit) if reward == -1: SMR[0] += 1 if reward == -0.4: SMR[1] += 1 if reward == 1 and np.mean(exploit_reward) < 1: SMR[3] += 1 if np.mean(exploit_reward) == 1: SMR[4] += 1 if exploit_reward[0] > -0.4 and exploit_reward[ 0] < 1 and exploit_reward[ 1] > -0.4 and exploit_reward[1] < 1: SMR[2] += 1 sample = [ model_input_state, [int(action[j])], model_input_newstate, reward ] # Sample to be saved in the memory memory_replay.append(sample) prob_MR.append([1, 0, 5]) state = new_state #print (counter) log_file.close() np.save('memory_replay', memory_replay) np.save('prob_MR', prob_MR) else: #print ("LOADING MEMORY REPLAY...") memory_replay = np.load('memory_replay.npy', allow_pickle=True) memory_replay = memory_replay.tolist() prob_MR = np.load('prob_MR.npy', allow_pickle=True) prob_MR = prob_MR.tolist() # Training phase # Make actions according to a epsilon greedy or softmax policy # Periodically train the neural network with batch taken from the replay memory #print("TRAINING PHASE...") AVG = [] MAX = [] def epsilon_func(x): e = (1 - 1 / (1 + np.exp(-x / 100))) * 0.8 + 0.2 if x > 400: e = e * ((499 - x) / 100) return e e = [] for i in range(1000): e.append(epsilon_func(i - 500)) st = [] for ep in range(1000): st.append(5000 / (ep + 1)**(1 / 3)) batch_size = 50 #print ("TRAINING PHASE") # ------------------------ PROCESSING PHASE ------------------------ negative_r1 = [] pos_r = [] pos_r2 = [] negative_r2 = [] null_r = [] counter1 = 0 counter2 = 0 counter3 = 0 counter4 = 0 counter5 = 0 COUNTS = [0, 0, 0, 0] VAR_COUNTS = [0, 0, 0, 0] for episode_index in tqdm(range(train_episode)): CMR0 = 0 CMR1 = 0 CMR2 = 0 CMR3 = 0 if episode_index % 5 == 0: COUNTS = [0, 0, 0, 0] VAR_COUNTS = [0, 0, 0, 0] AAM.append(AM) AM = [0, 0, 0] mem_replay_with_loss = create_mem_replay_with_loss() fit_input_temp = [] fit_output_temp = [] fit_actions_temp = [] counter_MR = 0 epsilon = epsilon_func(episode_index - 500) avg_avg_loss = 0 avg_max_loss = 0 iter_avg = 0 worst_states = [] GSMR.append(SMR) SMR = [0, 0, 0, 0] #print("\n Epsilon: ",epsilon) # ------------------------ TRAINING PHASE ------------------------ #print("Training episode ", episode_index, " with epsilon ", epsilon) # Generate a random episode if random_episode: # Generate new map world = WorldMap(world_size, target_num, world_gain_peak_range, world_gain_var_range, world_evolution_speed) log_name = output_log + "env_train_" + str(episode_index) + ".txt" log_file = open(log_name, "w") # Configure new environment train = True env.configure_environment(world, drone_goal, drone_num, extra_drone_num, drone_comm, drone_view, drone_memory, drone_battery, action_step, max_age, lookahead_step, malus, final_bonus, log_file, verbose, train, malus_sm) # Get the initial state of the system # If needed, normalize the state as you desire state = env.get_state() for step in range(step_num): for j in range(drone_num): model_input = state # The input might be different than the environment state model_input = np.asarray(model_input) number = env.get_available_targets() if number == 0: AM[0] += 1 elif number == 1: AM[1] += 1 else: AM[2] += 1 others_map = np.zeros((world_size[0], world_size[0])) for i in range(drone_num): if i != j: others_map += state[i] model_input = model_input[[0, 1, drone_num + 1], :] model_input[0] = state[j] model_input[1] = others_map model_input_state = copy.deepcopy(model_input) #model_input=np.asarray(model_input) model_input = model_input.reshape(1, 3, world_size[0], world_size[0]) #print (model_input) action = np.ndarray((1, 5)) for i in range(5): action[0, i] = 1 greedy_action = np.zeros(drone_num) greedy_action[j] = np.argmax( target_model.predict([model_input, action])) # Greedy action random_action = env.get_random_direction() # Random action for i in range(drone_num): if i != j: random_action[i] = 0 action_type = 0 if np.random.uniform(0, 1) < epsilon: action1 = random_action action = [] for i in range(drone_num): action.append(int(action1[i])) else: action = greedy_action action_type = 1 env.action_direction(action) # Carry out a new action new_state = env.get_state() # New system state explore_reward, exploit_reward = env.get_reward() # Obtained reward (exploit + explore rewards) reward = exploit_reward[j] # Overall reward (only exploit) model_input_new_state = copy.deepcopy(model_input_state) model_input_new_state[0] = new_state[j] model_input_new_state[drone_num] = new_state[drone_num + 1] sample = [ model_input_state, [int(action[j])], model_input_new_state, reward ] # Sample to be saved in the memory memory_replay.append(sample) prob_MR.append([1, 0, 5]) state = new_state if ( step + 1 ) % batch_update == 0: # Each "batch_update" steps, train the "step_model" NN # Choose a set of samples from the memory and insert them in the "samples" list: probability = np.random.rand(1) type_training = 0 if probability < 1.1 or len(mem2) < batch_size: samples_indexes = random.sample( list(range(len(memory_replay))), batch_size) for i in range(len(samples_indexes)): mem2.append(memory_replay[samples_indexes[i]]) samples = [] for s_index in samples_indexes: samples.append(memory_replay[s_index]) else: choices = np.arange(len(memory_replay)) probabilities = [] somma = 0 for i in range(len(prob_MR)): probabilities.append(prob_MR[i][0]) somma += prob_MR[i][0] probabilities = probabilities / somma samples_indexes = np.random.choice(choices, batch_size, p=probabilities) type_training = 1 samples = [] for s_index in samples_indexes: samples.append(memory_replay[s_index]) # Deep Q-learning approach fit_input = [] # Input batch of the model fit_output = [] # Desired output batch for the input fit_actions = [] fit_actions_predictions = [] for sample in samples: sample_state = sample[0] # Previous state sample_action = sample[1] # Action made sample_new_state = sample[2] # Arrival state sample_reward = sample[3] # Obtained reward sample_new_state = sample_new_state.reshape( 1, 3, world_size[0], world_size[0]) action = np.ndarray((1, 5)) for i in range(5): action[0, i] = 1 sample_goal = sample_reward + gamma * np.max( target_model.predict([sample_new_state, action])) sample_state = np.asarray(sample_state) sample_state = sample_state.reshape( 1, 3, world_size[0], world_size[0]) act = np.ndarray((1, 5)) for i in range(5): if i == sample_action[0]: act[0, i] = 1 else: act[0, i] = 0 sample_output = step_model.predict( [np.asarray(sample_state), action])[0] #print (sample_action) for i in range(5): if i == sample_action[0]: sample_output[i, 0] = (1 - alpha) * sample_output[ sample_action] + alpha * sample_goal else: sample_output[i, 0] = 0 #print (sample_state[0]) #print (sample_output) #print (act[0]) fit_input.append(sample_state[0]) # Input of the model fit_input_temp.append(sample_state[0]) fit_output.append(sample_output) # Output of the model fit_output_temp.append(sample_output) fit_actions.append(act[0]) fit_actions_temp.append(act[0]) fit_actions_predictions.append(action[0]) # Fit the model with the given batch step_model.fit( [np.asarray(fit_input), np.asarray(fit_actions)], np.asarray(fit_output), batch_size=None, epochs=epochs_num, steps_per_epoch=steps_per_epoch, callbacks=[history], verbose=0) mean_loss = np.mean(history.losses) #LOSSES[MR_type,episode_index]+=mean_loss H.append(history.losses) output = step_model.predict( [np.asarray(fit_input), np.asarray(fit_actions)]) total_output = step_model.predict([ np.asarray(fit_input), np.asarray(fit_actions_predictions) ]) loss = [] for i in range(batch_size): loss.append( (output[i][np.argmax(np.asarray(fit_actions[i]))] - np.asarray(fit_output[i][np.argmax( np.asarray(fit_actions[i]))]))**2) for i in range(batch_size): prob_MR[samples_indexes[i]][0] = loss[i][0] if prob_MR[samples_indexes[i]][2] != 5: if memory_replay[samples_indexes[i]][3] == -1: COUNTS[0] += 1 if np.argmax(total_output[i]) != prob_MR[ samples_indexes[i]][2]: VAR_COUNTS[0] += 1 prob_MR[samples_indexes[i]][2] = np.argmax( total_output[i]) elif memory_replay[samples_indexes[i]][3] == -0.4: COUNTS[1] += 1 if np.argmax(total_output[i]) != prob_MR[ samples_indexes[i]][2]: VAR_COUNTS[1] += 1 prob_MR[samples_indexes[i]][2] = np.argmax( total_output[i]) elif memory_replay[samples_indexes[i]][3] == 1: COUNTS[3] += 1 if np.argmax(total_output[i]) != prob_MR[ samples_indexes[i]][2]: VAR_COUNTS[3] += 1 prob_MR[samples_indexes[i]][2] = np.argmax( total_output[i]) else: COUNTS[2] += 1 if np.argmax(total_output[i]) != prob_MR[ samples_indexes[i]][2]: VAR_COUNTS[2] += 1 prob_MR[samples_indexes[i]][2] = np.argmax( total_output[i]) else: prob_MR[samples_indexes[i]][2] = np.argmax( total_output[i]) counter_MR += batch_size if ( step + 1 ) % model_update == 0: # Each "model_update" steps, substitute the target_model with the step_model target_model.set_weights(step_model.get_weights()) log_file.close() # Testing phase # Make actions ONLY according to the NN model output (temperature is 0) # No training is carried out # The results are compared with the lookahead policy implemented in the environment # ------------------------ TESTING PHASE ------------------------ #if episode_index%5==0 and episode_index>0: #VARIATIONS.append([VAR_COUNTS[0]/COUNTS[0],VAR_COUNTS[1]/COUNTS[1],VAR_COUNTS[2]/COUNTS[2],VAR_COUNTS[3]/COUNTS[3]]) if episode_index % testing_update == 0: #print("TESTING PHASE...") if episode_index > 0: Ac0.append(A_0 / 100) Ac1.append(A_1 / 100) Ac2.append(A_2 / 100) Ac3.append(A_3 / 100) Ac4.append(A_4 / 100) A_0 = 0 A_1 = 0 A_2 = 0 A_3 = 0 A_4 = 0 success_vec.append([]) success_vec1.append([]) mean_reward = 0 PERCENTS = 0 for test_index in range(test_episode): REWARD = 0 # Generate a random episode if random_episode: # Generate new map world = WorldMap(world_size, target_num, world_gain_peak_range, world_gain_var_range, world_evolution_speed) log_name = output_log + "env_test_" + str( episode_index) + ".txt" log_file = open(log_name, "w") # Configure new environment train = True env.configure_environment(world, drone_goal, drone_num, extra_drone_num, drone_comm, drone_view, drone_memory, drone_battery, action_step, max_age, lookahead_step, malus, final_bonus, log_file, verbose, train, malus_sm) # Get the initial state of the system # If needed, normalize the state as you desire state = env.get_state() for step in range(40): # env.render() # Choose always the greedy action using the target_model for j in range(drone_num): REWARD1 = 0 actions = np.ndarray((1, 5)) for i in range(5): actions[0, i] = 1 model_input = state own_map = state[j] model_input = np.asarray(model_input) model_input = model_input[[0, 1, drone_num + 1], :] others_map = np.zeros((world_size[0], world_size[0])) for i in range(drone_num): if i != j: others_map += state[i] model_input[1] = others_map model_input[0] = own_map model_input = model_input.reshape( 1, 3, world_size[0], world_size[0]) action = np.zeros(drone_num) action[j] = np.argmax( target_model.predict([model_input, actions])) env.action_direction(action) # Carry out a new action if action[0] == 0: A_0 += 1 elif action[0] == 1: A_1 += 1 elif action[0] == 2: A_2 += 1 elif action[0] == 3: A_3 += 1 else: A_4 += 1 new_state = env.get_state() # New system state explore_reward, exploit_reward = env.get_reward() reward = np.mean(exploit_reward) if np.mean(exploit_reward) == 1: counter2 += drone_num for i in range(drone_num): if exploit_reward[i] == -malus_sm: counter1 += 1 for i in range(drone_num): if exploit_reward[i] == -1: counter3 += 1 for i in range(drone_num): if exploit_reward[i] == 1 and np.mean( exploit_reward) < 1: counter4 += 1 for i in range(drone_num): if exploit_reward[i] < 1 and exploit_reward[i] > 0: counter5 += 1 REWARD += reward REWARD1 += reward state = new_state # Substitute the previous state with the new state #print (REWARD/30) success_vec[-1].append(REWARD / 80) if REWARD1 == 1: success_vec1[-1].append(1) PERCENTS += 1 else: success_vec1[-1].append(0) mean_reward += REWARD / 80 log_file.close() #print ("\n Success rate: \n") print(mean_reward / 100) #print (PERCENTS) negative_r1.append(counter1) pos_r.append(counter2) negative_r2.append(counter3) pos_r2.append(counter4) null_r.append(counter5) success_episodes.append(PERCENTS) print(PERCENTS) counter1 = 0 counter2 = 0 counter3 = 0 counter4 = 0 counter5 = 0 #print("\n Mean success ratio: ", np.mean(success_vec[-1])) # Decrease system temperatures log_name = output_log + "example" + str(episode_index) + ".txt" log_file = open(log_name, "w") env.close() log_file.close() target_model.save('target_model' + str(drone_num) + str(target_num) + '_v' + str(version) + '.h5') del step_model del target_model return success_vec, success_vec1, H, negative_r1, pos_r, negative_r2, pos_r2, null_r, Ac0, Ac1, Ac2, Ac3, Ac4, SMR, GSMR, AM, AAM, Mappa, VAR, MINQ, MAXQ, MEANQ, LOSSES, success_episodes, VARIATIONS
] #,'seasonEndYear','seasonWeek', 'N1', 'E1', 'SC3', 'SC2', 'D1', 'B1', 'I2', 'G1', 'E3', 'T1', 'EC','D2', 'F1', 'I1', 'P1', 'SP2', 'SP1', 'E2', 'SC0', 'E0', 'F2', 'SC1'] train_x = combined_data[predictors][ combined_data['seasonEndYear'] <= train_to_season].values train_y = ((combined_data['FTR'] == 'H') * 1)[combined_data['seasonEndYear'] <= train_to_season].values test_x = combined_data[predictors][combined_data['seasonEndYear'] == ( train_to_season + 1)].values input_dimension = len(predictors) model = Sequential() model.add( Dense(input_dimension * 2, input_dim=input_dimension, activation='relu', init=initializers.Ones())) model.add(Dense(input_dimension, input_dim=input_dimension, activation='relu')) model.add(Dense(input_dimension, input_dim=input_dimension, activation='relu')) model.add(Dense(1, activation='sigmoid')) optim_rmsprop = optimizers.RMSprop(lr=0.001, rho=0.9) optim_sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.5, nesterov=True) optim_adagrad = optimizers.Adagrad(lr=0.01) model.compile(loss='binary_crossentropy', optimizer=optim_adagrad, metrics=['accuracy']) model.fit(train_x, train_y, epochs=40, batch_size=10) nn_combined_outcomes = combined_data[[
def build(self, input_shape): input_dim = input_shape[-1] self.kernel = self.add_weight(shape=(input_dim, self.memdim * 3), name='kernel', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.recurrent_kernel = self.add_weight( shape=(self.memdim, self.memdim * 3), name='recurrent_kernel', initializer=self.recurrent_initializer, regularizer=self.recurrent_regularizer, constraint=self.recurrent_constraint) self.W_q = self.add_weight(shape=(self.memdim, self.memdim), name='W_q', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.W_k = self.add_weight(shape=(self.memdim, self.memdim), name='W_k', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.W_v = self.add_weight(shape=(self.memdim, self.memdim), name='W_v', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.mlp_kernel_1 = self.add_weight(shape=(self.memdim, self.memdim), name='mlp_kernel_1', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.mlp_kernel_2 = self.add_weight(shape=(self.memdim, self.memdim), name='mlp_kernel_2', initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) self.mlp_gain_1 = self.add_weight(shape=(self.memdim,), name='mlp_gain_1', initializer=initializers.Ones(), regularizer=None, constraint=None) self.mlp_gain_2 = self.add_weight(shape=(self.memdim,), name='mlp_gain_2', initializer=initializers.Ones(), regularizer=None, constraint=None) self.mlp_bias_1 = self.add_weight(shape=(self.memdim,), name='mlp_bias_1', initializer=initializers.Zeros(), regularizer=None, constraint=None) self.mlp_bias_2 = self.add_weight(shape=(self.memdim,), name='mlp_bias_2', initializer=initializers.Zeros(), regularizer=None, constraint=None) if self.use_bias: if self.unit_forget_bias: def bias_initializer(_, *args, **kwargs): return K.concatenate([ self.bias_initializer((self.memdim,), *args, **kwargs), initializers.Ones()((self.memdim,), *args, **kwargs), self.bias_initializer((self.memdim,), *args, **kwargs), ]) else: bias_initializer = self.bias_initializer self.bias = self.add_weight(shape=(self.memdim * 3,), name='bias', initializer=bias_initializer, regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.kernel_i = self.kernel[:, :self.memdim] self.kernel_f = self.kernel[:, self.memdim: self.memdim * 2] self.kernel_o = self.kernel[:, self.memdim * 2:] self.recurrent_kernel_i = self.recurrent_kernel[:, :self.memdim] self.recurrent_kernel_f = self.recurrent_kernel[:, self.memdim: self.memdim * 2] self.recurrent_kernel_o = self.recurrent_kernel[:, self.memdim * 2:] if self.use_bias: self.bias_i = self.bias[:self.memdim] self.bias_f = self.bias[self.memdim: self.memdim * 2] self.bias_o = self.bias[self.memdim * 2:] else: self.bias_i = None self.bias_f = None self.bias_o = None self.built = True