def sparse_autoencoders(X, Y): train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.2, stratify=data[:, 784], random_state=42) train_X = train_X.astype('float32') / 255.0 test_X = test_X.astype('float32') / 255.0 n_h = 64 input_img = Input(shape=(784, )) code = Dense(n_h, activation='relu', activity_regularizer=regularizers.l1(10e-6))(input_img) output_img = Dense(784, activation='sigmoid')(code) modeL = Model(input_img, output_img) modeL.compile(optimizer='adam', loss='binary_crossentropy') history = modeL.fit(train_X, train_X, epochs=10) encoded = Model(input_img, code) # reconstructed = autoencoder.predict(test_X) weights = modeL.get_weights()[0].T # hidden_layer_vis(weights) train_X = encoded.predict(train_X) test_X = encoded.predict(test_X) param = model(train_X, train_Y, 10, 0, [20]) pred = predict(test_X, param) # # print(confusion_matrix(test_Y, pred)) print('The accuracy of neural networks is', metrics.accuracy_score(pred, test_Y))
def build_fixed_layers_models(model: Model) -> List[Model]: models_list: List[Model] = [] weights = model.get_weights() for i in range(1, len(model.layers) + 1): if not model.layers[i - 1].trainable_weights: continue frozen_model = tf.keras.models.clone_model(model) for j in range(i): frozen_model.layers[j].trainable = False frozen_model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.SGD(), metrics=['accuracy']) frozen_model.set_weights(weights) models_list.append(frozen_model) return models_list
def save( self, model: Model, include_optimizer: bool = False, update: bool = False, meta: Optional[dict] = None, ): """ Saves a Tensorflow model as a TileDB array. :param model: Tensorflow model. :param include_optimizer: Boolean. Whether to save the optimizer or not. :param update: Boolean. Whether we should update any existing TileDB array model at the target location. :param meta: Dict. Extra metadata to save in a TileDB array. """ if not isinstance(model, (Functional, Sequential)): raise NotImplementedError( "No support for Subclassed models at the moment. Your " "model should be either Sequential or Functional.") # Serialize model weights and optimizer (if needed) model_weights = pickle.dumps(model.get_weights(), protocol=4) # Serialize model optimizer optimizer_weights = self._serialize_optimizer_weights( model=model, include_optimizer=include_optimizer) # Create TileDB model array if not update: self._create_array() self._write_array( model=model, include_optimizer=include_optimizer, serialized_weights=model_weights, serialized_optimizer_weights=optimizer_weights, meta=meta, )
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False, supports_masking=False): # generate input data if input_data is None: if not input_shape: raise AssertionError() if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_mask = [] if all(isinstance(e, tuple) for e in input_data_shape): input_data = [] for e in input_data_shape: input_data.append( (10 * np.random.random(e)).astype(input_dtype)) if supports_masking: a = np.full(e[:2], False) a[:, :e[1] // 2] = True input_mask.append(a) else: input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) if supports_masking: a = np.full(input_data_shape[:2], False) a[:, :input_data_shape[1] // 2] = True print(a) print(a.shape) input_mask.append(a) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except Exception: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if isinstance(input_shape, list): if fixed_batch_size: x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] if supports_masking: mask = [ Input(batch_shape=e[0:2], dtype=bool) for e in input_shape ] else: x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] if supports_masking: mask = [Input(shape=(e[1], ), dtype=bool) for e in input_shape] else: if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) if supports_masking: mask = Input(batch_shape=input_shape[0:2], dtype=bool) else: x = Input(shape=input_shape[1:], dtype=input_dtype) if supports_masking: mask = Input(shape=(input_shape[1], ), dtype=bool) if supports_masking: y = layer(Masking()(x), mask=mask) else: y = layer(x) if not (K.dtype(y) == expected_output_dtype): raise AssertionError() # check with the functional API if supports_masking: model = Model([x, mask], y) actual_output = model.predict([input_data, input_mask[0]]) else: model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: if not (expected_dim == actual_dim): raise AssertionError("expected_shape", expected_output_shape, "actual_shape", actual_output_shape) if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
class JointEmbeddingModel: def __init__(self, config): self.data_dir = config.data_dir self.model_name = config.model_name self.meth_name_len = config.methname_len # the max length of method name self.apiseq_len = config.apiseq_len self.tokens_len = config.tokens_len self.desc_len = config.desc_len self.vocab_size = config.n_words # the size of vocab self.embed_dims = config.embed_dims self.lstm_dims = config.lstm_dims self.hidden_dims = config.hidden_dims self.margin = 0.05 self.init_embed_weights_meth_name = config.init_embed_weights_methodname self.init_embed_weights_tokens = config.init_embed_weights_tokens self.init_embed_weights_desc = config.init_embed_weights_desc self.meth_name = Input(shape=(self.meth_name_len,), dtype='int32', name='meth_name') self.apiseq = Input(shape=(self.apiseq_len,), dtype='int32', name='apiseq') self.tokens = Input(shape=(self.tokens_len,), dtype='int32', name='tokens2') self.desc_good = Input(shape=(self.desc_len,), dtype='int32', name='desc_good') self.desc_bad = Input(shape=(self.desc_len,), dtype='int32', name='desc_bad') if not os.path.exists(self.data_dir + 'model/' + self.model_name): os.makedirs(self.data_dir + 'model/' + self.model_name) def build(self): self.transformer_meth = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=2, max_len=self.meth_name_len, name='methT') self.transformer_apiseq = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=self.apiseq_len, name='apiseqT') self.transformer_desc = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=self.desc_len, name='descT') # self.transformer_ast = EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=4, max_len=128) self.transformer_tokens = transformer.EncoderModel(vocab_size=self.vocab_size, model_dim=self.hidden_dims, embed_dim=self.embed_dims, ffn_dim=self.lstm_dims, droput_rate=0.2, n_heads=8, max_len=self.tokens_len, name='tokensT') # create path to store model Info # 1 -- CodeNN meth_name = Input(shape=(self.meth_name_len,), dtype='int32', name='meth_name') apiseq = Input(shape=(self.apiseq_len,), dtype='int32', name='apiseq') tokens3 = Input(shape=(self.tokens_len,), dtype='int32', name='tokens3') # method name # embedding layer meth_name_out = self.transformer_meth(meth_name) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_methodname') method_name_pool = maxpool(meth_name_out) activation = Activation('tanh', name='active_method_name') method_name_repr = activation(method_name_pool) # apiseq # embedding layer apiseq_out = self.transformer_apiseq(apiseq) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_apiseq') apiseq_pool = maxpool(apiseq_out) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) # tokens # embedding layer tokens_out = self.transformer_tokens(tokens3) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_tokens') tokens_pool = maxpool(tokens_out) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) # fusion method_name, apiseq, tokens merge_method_name_api = Concatenate(name='merge_methname_api')([method_name_repr, apiseq_repr]) merge_code_repr = Concatenate(name='merge_code_repr')([merge_method_name_api, tokens_repr]) code_repr = Dense(self.hidden_dims, activation='tanh', name='dense_coderepr')(merge_code_repr) self.code_repr_model = Model(inputs=[meth_name, apiseq, tokens3], outputs=[code_repr], name='code_repr_model') self.code_repr_model.summary() self.output = Model(inputs=self.code_repr_model.input, outputs=self.code_repr_model.get_layer('tokensT').output) self.output.summary() # 2 -- description desc = Input(shape=(self.desc_len,), dtype='int32', name='desc') # desc # embedding layer desc_out = self.transformer_desc(desc) # max pooling maxpool = Lambda(lambda x: k.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpooling_desc') desc_pool = maxpool(desc_out) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self.desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') self.desc_repr_model.summary() # 3 -- cosine similarity code_repr = self.code_repr_model([meth_name, apiseq, tokens3]) desc_repr = self.desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[meth_name, apiseq, tokens3, desc], outputs=[cos_sim], name='sim_model') self.sim_model = sim_model self.sim_model.summary() # 4 -- build training model good_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_good]) bad_sim = sim_model([self.meth_name, self.apiseq, self.tokens, self.desc_bad]) loss = Lambda(lambda x: k.maximum(1e-6, self.margin - (x[0] - x[1])), output_shape=lambda x: x[0], name='loss')( [good_sim, bad_sim]) self.training_model = Model(inputs=[self.meth_name, self.apiseq, self.tokens, self.desc_good, self.desc_bad], outputs=[loss], name='training_model') self.training_model.summary() def compile(self, optimizer, **kwargs): optimizer = keras.optimizers.SGD(lr=0.0001, momentum=0.9, nesterov=True) # optimizer = keras.optimizers.Adam(lr=0.0001) # print(self.code_repr_model.layers, self.desc_repr_model.layers, self.training_model.layers, self.sim_model.layers) self.code_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs) self.desc_repr_model.compile(loss='cosine_proximity', optimizer=optimizer, **kwargs) self.training_model.compile(loss=lambda y_true, y_pred: y_pred + y_true - y_true, optimizer=optimizer, **kwargs) self.sim_model.compile(loss='binary_crossentropy', optimizer=optimizer, **kwargs) def fit(self, x, **kwargs): y = np.zeros(shape=x[0].shape[:1], dtype=np.float32) return self.training_model.fit(x, y, **kwargs) def getOutput(self, x): # functor = k.function([self.code_repr_model.layers[0].input, k.learning_phase()], [self.code_repr_model.layers[0].output]) # print(functor(x)[0]) print(self.output.predict(x)) def repr_code(self, x, **kwargs): return self.code_repr_model.predict(x, **kwargs) def repr_desc(self, x, **kwargs): return self.desc_repr_model.predict(x, **kwargs) def predict(self, x, **kwargs): return self.sim_model.predict(x, **kwargs) def save(self, code_model_file, desc_model_file, **kwargs): file = h5py.File(code_model_file, 'w') weight_code = self.code_repr_model.get_weights() for i in range(len(weight_code)): file.create_dataset('weight_code'+str(i), data=weight_code[i]) file.close() file = h5py.File(desc_model_file, 'w') weight_desc = self.desc_repr_model.get_weights() for i in range(len(weight_desc)): file.create_dataset('weight_desc'+str(i), data=weight_desc[i]) file.close() # self.code_repr_model.save_weights(code_model_file, **kwargs) # self.desc_repr_model.save_weights(desc_model_file, **kwargs) def load(self, code_model_file, desc_model_file, **kwargs): # self.code_repr_model.load_weights(code_model_file, **kwargs) # self.desc_repr_model.load_weights(desc_model_file, **kwargs) file = h5py.File(code_model_file, 'r') weight_code = [] for i in range(len(file.keys())): weight_code.append(file['weight_code'+str(i)][:]) self.code_repr_model.set_weights(weight_code) file.close() file = h5py.File(desc_model_file, 'r') weight_desc = [] for i in range(len(file.keys())): weight_desc.append(file['weight_desc'+str(i)][:]) self.desc_repr_model.set_weights(weight_desc) file.close()
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False): # generate input data if input_data is None: if not input_shape: raise AssertionError() if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) if all(isinstance(e, tuple) for e in input_data_shape): input_data = [] for e in input_data_shape: input_data.append( (10 * np.random.random(e)).astype(input_dtype)) else: input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except Exception: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if isinstance(input_shape, list): if fixed_batch_size: x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] else: x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] else: if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) else: x = Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) if not (K.dtype(y) == expected_output_dtype): raise AssertionError() # check with the functional API model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: if not (expected_dim == actual_dim): raise AssertionError() if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
def _serialize_model_weights(model: Model) -> bytes: """ Serialization of model weights """ return pickle.dumps(model.get_weights(), protocol=4)
model.set_weights([ layer for weight, bias in zip(deepcopy(initial_weights), deepcopy(initial_biases)) for layer in (weight, bias.reshape(-1)) ]) history = model.fit( np.array(deepcopy(inputs)), np.array(deepcopy(targets)), epochs=epochs, verbose="0", ) keras_vals = { "weights": model.get_weights()[::2], "biases": model.get_weights()[1::2], } def test_learned_something() -> None: """Ensure that the weights and biases are changing.""" for key in ["weights", "biases"]: assert len(keras_vals[key]) == len(tf_vals[key]) for idx in range(len(initial_weights)): assert not np.allclose(keras_vals[key][idx], initial_weights[idx]) assert not np.allclose(tf_vals[key][idx], initial_weights[idx]) def test_weights_and_biases() -> None: """Ensure tf weights and biases match keras."""
input = Input(shape=(1, ), batch_size=6) re = Reshape(target_shape=(1, 1))(input) rnn_1 = LSTM(128, stateful=True, return_sequences=True)(re) rnn_2 = LSTM(128, stateful=True, return_sequences=False)(rnn_1) output_1 = Dense(1, activation='linear')(rnn_2) output_2 = Dense(1, activation='sigmoid')(rnn_2) output_3 = Dense(3, activation='softmax')(rnn_2) model = Model(inputs=input, outputs=[output_1, output_2, output_3]) adam = Adam(lr=0.001) model.compile(loss="mse", optimizer=adam) raw_weights = model.get_weights() new_weights = [] for raw in raw_weights: new_weights.append(np.random.uniform(-5, 5, raw.shape)) model.set_weights(np.array(new_weights)) one, tow, three = model.predict([1, 2, 3, 4, 5, 6]) one = one.tolist() tow = tow.tolist() three = three.tolist() print('predict: [1, 2, 3, 4, 5, 6]')
class CriticNetwork(object): """ Input to the network is the state and action, output is Q(s,a). The action must be obtained from the output of the Actor network. """ def __init__(self, sess, root_net, inputs, state_dim, action_dim, learning_rate, tau): self.sess = sess assert isinstance(state_dim, list), 'state_dim must be a list.' self.s_dim = state_dim assert isinstance(action_dim, list), 'action_dim must be a list.' self.a_dim = action_dim self.learning_rate = learning_rate self.tau = tau # Input self.inputs = inputs self.target_inputs = inputs # Create the critic network self.action, self.out = self.create_critic_network(root_net=root_net) self.critic_model = Model(inputs=[inputs, self.action], outputs=self.out) #self.network_params = tf.trainable_variables()[num_actor_vars:] # Target Network self.target_action, self.target_out = self.create_critic_network( root_net=None) self.target_critic_model = Model( inputs=[self.target_inputs, self.target_action], outputs=self.target_out) #self.target_network_params = tf.trainable_variables()[(len(self.network_params) + num_actor_vars):] # Op for periodically updating target network with online network # weights with regularization #self.update_target_network_params = \ # [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) \ # + tf.multiply(self.target_network_params[i], 1. - self.tau)) # for i in range(len(self.target_network_params))] # Network target (y_i) self.predicted_q_value = tf.placeholder(tf.float32, [None, 1]) # Define loss and optimization Op #self.loss = tflearn.mean_square(self.predicted_q_value, self.out) self.loss = tf.reduce_mean(tf.square(self.predicted_q_value - self.out)) self.optimize = tf.train.AdamOptimizer(self.learning_rate).minimize( self.loss) # Get the gradient of the net w.r.t. the action. # For each action in the minibatch (i.e., for each x in xs), # this will sum up the gradients of each critic output in the minibatch # w.r.t. that action. Each output is independent of all # actions except for one. self.action_grads = tf.gradients(self.out, self.action) def create_critic_network(self, root_net): raise NotImplementedError( 'Create critic should return (inputs, action, out)') def train(self, inputs, action, predicted_q_value): return self.sess.run( [self.out, self.optimize], feed_dict={ self.inputs: inputs, self.action: action, self.predicted_q_value: predicted_q_value }) def predict(self, inputs, action): return self.sess.run(self.out, feed_dict={ self.inputs: inputs, self.action: action }) def predict_target(self, inputs, action): return self.sess.run(self.target_out, feed_dict={ self.target_inputs: inputs, self.target_action: action }) def action_gradients(self, inputs, actions): return self.sess.run(self.action_grads, feed_dict={ self.inputs: inputs, self.action: actions }) def update_target_network(self): self.target_critic_model.set_weights(self.critic_model.get_weights())
class DDPG(object): def __init__(self, env, sess, actor_noise, obs_normalizer=None, action_processor=None, predictor_type="cnn", use_batch_norm=False, load_root_model=False, config=DEFAULT_CONFIG): self.config = config assert self.config['max step'] > self.config[ 'batch size'], 'Max step must be bigger than batch size' self.episode = self.config["episode"] self.actor_learning_rate = self.config["actor learning rate"] self.critic_learning_rate = self.config["critic learning rate"] self.tau = self.config["tau"] self.gamma = self.config["gamma"] self.batch_size = self.config['batch size'] self.action_processor = action_processor np.random.seed(self.config['seed']) if env: env.seed(self.config['seed']) self.sess = sess # if env is None, then DDPG just predicts self.env = env self.actor_noise = actor_noise # share state input has_complex_state = ( isinstance(self.env.observation_space, gym.spaces.Dict) or isinstance(self.env.observation_space, gym.spaces.Tuple)) if obs_normalizer and has_complex_state: state_input = Input( shape=self.env.observation_space.spaces[obs_normalizer].shape, name="state_input") else: state_input = Input(shape=self.env.observation_space.shape, name="state_input") target_state_input = Input( shape=self.env.observation_space.spaces[obs_normalizer].shape, name="target_state_input") self.obs_normalizer = obs_normalizer # shape action_dim = env.action_space.shape[0] nb_assets = state_input.shape[1] window_length = state_input.shape[2] nb_features = state_input.shape[3] # paths self.model_save_path = get_model_path(window_length=window_length, predictor_type=predictor_type, use_batch_norm=use_batch_norm) self.summary_path = get_result_path( window_length=window_length, predictor_type=predictor_type, use_batch_norm=use_batch_norm) + "/" + datetime.now().strftime( "%Y-%m-%d-%H%M%S") self.root_model_save_path = get_root_model_path( window_length, predictor_type, use_batch_norm) # feature extraction self.predictor_type = predictor_type self.use_batch_norm = use_batch_norm root_net = RootNetwork(inputs=state_input, predictor_type=self.predictor_type, use_batch_norm=self.use_batch_norm).net self.root_model = Model(state_input, root_net) if load_root_model == True: try: self.root_model.load_weights(self.root_model_save_path) for layer in self.root_model.layers: layer.trainable = False except: print("ERROR while loading root model ", self.root_model_save_path) else: pass variable_summaries(root_net, "Root_Output") #array_variable_summaries(self.root_model.layers[1].weights, "Root_Input_1") #array_variable_summaries(self.root_model.layers[2].weights, "Root_Input_2") #array_variable_summaries(self.root_model.layers[-1].weights, "Root_Output_2") target_root_net = RootNetwork(inputs=target_state_input, predictor_type=predictor_type, use_batch_norm=use_batch_norm).net self.target_root_model = Model(target_state_input, target_root_net) if load_root_model == True: try: self.target_root_model.load_weights(self.root_model_save_path) for layer in self.target_root_model.layers: layer.trainable = False except: print("ERROR while loading root model ", self.root_model_save_path) else: pass self.target_root_model.set_weights(self.root_model.get_weights()) # ===================================================================== # # Actor Model # # Chain rule: find the gradient of changing the actor network params in # # getting closest to the final value network predictions, i.e. de/dA # # Calculate de/dA as = de/dC * dC/dA, where e is error, C critic, A act # # ===================================================================== # self.actor_state_input, self.actor_model = Actor( state_input=state_input, root_net=root_net, action_dim=action_dim).references() _, self.target_actor_model = Actor(state_input=target_state_input, root_net=target_root_net, action_dim=action_dim).references() # summary #array_variable_summaries(self.actor_model.layers[-1].weights, "Actor_Output") #actor_model_weights = self.actor_model.trainable_weights #self.actor_grads = K.gradients(self.actor_model.output,actor_model_weights) # dC/dA (from actor) # grads = zip(self.actor_grads, actor_model_weights) action_grad = Input(shape=(action_dim, )) loss = K.mean(-action_grad * self.actor_model.outputs) for regularizer_loss in self.actor_model.losses: loss += regularizer_loss loss = loss optimizer = Adam(lr=self.actor_learning_rate) updates_op = optimizer.get_updates( params=self.actor_model.trainable_weights, # constraints=self.model.constraints, loss=loss) self.optimize = K.function( inputs=[self.actor_state_input, action_grad, K.learning_phase()], outputs=[loss], updates=updates_op) # calling function for the loop """ self.actor_grads = tf.gradients(self.actor_model.output, actor_model_weights, -self.actor_critic_grad) # dC/dA (from actor) tf.summary.histogram("Actor_Critic_Grad", self.actor_critic_grad) grads = zip(self.actor_grads, actor_model_weights) self.optimize = tf.train.AdamOptimizer(self.actor_learning_rate).apply_gradients(grads) """ # ===================================================================== # # Critic Model # # ===================================================================== # self.critic_state_input, self.critic_action_input, self.critic_model = Critic( state_input=state_input, root_net=root_net, action_dim=action_dim, lr=self.critic_learning_rate).references() array_variable_summaries(self.critic_model.layers[-1].weights, "Critic_Output") _, _, self.target_critic_model = Critic( state_input=target_state_input, root_net=target_root_net, action_dim=action_dim, lr=self.critic_learning_rate).references() """ self.critic_grads = tf.gradients(self.critic_model.output, self.critic_action_input) # where we calcaulte de/dC for feeding above """ #self.actor_critic_grad = tf.placeholder(tf.float32,[None, self.env.action_space.shape[0]]) # where we will feed de/dC (from critic) # summary self.critic_grads = K.gradients( self.critic_model.outputs, self.critic_action_input ) # where we calculate de/dC for feeding above self.compute_critic_gradient = K.function( inputs=[ self.critic_model.output, self.critic_action_input, self.critic_state_input ], outputs=self.critic_grads) # calling function for the loop tf.summary.histogram("Critic_Grad", self.critic_grads) # Update target networks self.update_target() # summary #self.summary_ops, self.summary_vars = build_summaries(action_dim=action_dim) with tf.variable_scope("Global"): self.episode_reward = tf.Variable(0., name="episode_reward") tf.summary.scalar("Reward", self.episode_reward) self.episode_min_reward = tf.Variable(0., name="episode_min_reward") tf.summary.scalar("Min_Reward", self.episode_min_reward) self.episode_ave_max_q = tf.Variable(0., name="episode_ave_max_q") tf.summary.scalar("Qmax_Value", self.episode_ave_max_q) self.loss_critic = tf.Variable(0., name="loss_critic") tf.summary.scalar("Loss_critic", self.loss_critic) self.loss_actor = tf.Variable(0., name="loss_actor") tf.summary.scalar("Loss_actor", self.loss_actor) self.ep_base_action = tf.Variable(initial_value=self.env.sim.w0, name="ep_base_action") tf.summary.histogram("Action_base", self.ep_base_action) self.ep_action = tf.Variable(initial_value=self.env.sim.w0, name="ep_action") tf.summary.histogram("Action", self.ep_action) self.merged = tf.summary.merge_all() # Initialize for later gradient calculations # self.sess.run(tf.global_variables_initializer()) # ========================================================================= # # Target Model Updating # # ========================================================================= # def _update_actor_target(self): weights = self.actor_model.get_weights() target_weights = self.target_actor_model.get_weights() for i in range(len(target_weights)): target_weights[i] = weights[i] * self.tau + target_weights[i] * ( 1 - self.tau) self.target_actor_model.set_weights(target_weights) def _update_critic_target(self): weights = self.critic_model.get_weights() target_weights = self.target_critic_model.get_weights() for i in range(len(target_weights)): target_weights[i] = weights[i] * self.tau + target_weights[i] * ( 1 - self.tau) self.target_critic_model.set_weights(target_weights) def update_target(self): self._update_actor_target() self._update_critic_target() # ========================================================================= # # Model Predictions # # ========================================================================= # def act(self, cur_state): self.epsilon *= self.epsilon_decay if np.random.random() < self.epsilon: return self.env.action_space.sample() return self.actor_model.predict(cur_state).reshape( (self.env.action_space.shape[0], )) def initialize(self, load_weights=True, verbose=True): """ Load training history from path. To be add feature to just load weights, not training states """ if load_weights: try: variables = tf.global_variables() param_dict = {} saver = tf.train.Saver() saver.restore(self.sess, self.model_save_path) for var in variables: var_name = var.name[:-2] if verbose: print('Loading {} from checkpoint. Name: {}'.format( var.name, var_name)) param_dict[var_name] = var except: traceback.print_exc() print('Build model from scratch') self.sess.run(tf.global_variables_initializer()) else: print('Build model from scratch') self.sess.run(tf.global_variables_initializer()) def _train_actor(self, samples): for sample in samples: cur_state, action, reward, new_state, _ = sample predicted_action = self.actor_model.predict(cur_state) grads = self.sess.run(self.critic_grads, feed_dict={ self.critic_state_input: cur_state, self.critic_action_input: predicted_action })[0] self.sess.run(self.optimize, feed_dict={ self.actor_state_input: cur_state, self.actor_critic_grad: grads }) def _train_critic(self, samples): for sample in samples: cur_state, action, reward, new_state, done = sample if not done: target_action = self.target_actor_model.predict(new_state) future_reward = self.target_critic_model.predict( [new_state, target_action])[0][0] reward += self.gamma * future_reward history_critic = self.critic_model.fit([cur_state, action], reward, verbose=0, batch_size=self.batch_size) # print("reward = ", reward, "/", self.critic_model.predict([cur_state, action])) # for layer in self.critic_model.layers: # print(layer, " weights = ", layer.get_weights()) return history_critic def train(self, save_every_episode=1, verbose=True, debug=False): writer = tf.summary.FileWriter(self.summary_path, self.sess.graph) np.random.seed(self.config['seed']) num_episode = self.config['episode'] gamma = self.config['gamma'] self.buffer = ReplayBuffer(self.config['buffer size']) # @TODO : could monitor the Average Qmax and stop when no more change # for example change less than 1e-3 for 5 episodes delta_QMax = 1e-4 nb_episodes_stable = 5 stored_ep_ave_max_q = 0.0 stored_episodes_stable = 0 previous_i = 0 # main training loop for i in range(num_episode): if verbose and debug: print("Episode: {} Replay Buffer {}".format( i, self.buffer.count)) # receive initial state previous_observation = self.env.reset() if self.obs_normalizer: previous_observation = previous_observation[ self.obs_normalizer] ep_reward = 0.0 episode_min_reward = 0.0 ep_ave_max_q = 0.0 # keep track of loss for episode loss_critic = 0.0 loss_actor = 0.0 self.actor_noise.reset() # keeps sampling until done for j in range(self.config['max step']): # select action according to the current policy and exploration noise base_action = self.actor_model.predict( np.expand_dims(previous_observation, axis=0)).squeeze(axis=0) action = base_action + self.actor_noise() # normalize action action = np.clip(action, 0.0, 1.00) action /= action.sum() action_take = action if self.action_processor: action_take = self.action_processor(action) # execute action and observe reward and new state observation, reward, done, _ = self.env.step(action_take) if self.obs_normalizer: observation = observation[self.obs_normalizer] # make standard deviation close to one observation = observation * 20.0 # add to buffer self.buffer.add(previous_observation, action, reward, done, observation) if self.buffer.size() >= self.batch_size: # ========================================================================= # # sample a random mini-batch # # ========================================================================= # s_batch, a_batch, r_batch, t_batch, s2_batch = self.buffer.sample_batch( self.batch_size) # Calculate targets from the target critic and the target actor target_q = self.target_critic_model.predict( [s2_batch, self.target_actor_model.predict(s2_batch)]) y_i = [] for k in range(self.batch_size): if t_batch[k]: y_i.append(r_batch[k]) else: y_i.append(r_batch[k] + gamma * target_q[k][0]) # Update the critic given the targets by minimizing the loss (mse) # loss_critic += self.critic_model.train_on_batch([s_batch, a_batch], np.reshape(y_i, (self.batch_size, 1)))[0] stop_on_no_improvement = EarlyStopping(monitor='loss', min_delta=0.001, patience=3, verbose=0, mode='auto') history_critic = self.critic_model.fit( [s_batch, a_batch], np.reshape(y_i, (self.batch_size, 1)), #epochs=100, #callbacks=[stop_on_no_improvement], verbose=0) loss_critic += history_critic.history['loss'][-1] # keep track of the prediction for reporting predicted_q_value = self.critic_model.predict( [s_batch, a_batch]) ep_ave_max_q += np.amax(predicted_q_value) # Update the actor policy using the sampled gradient a_outs = self.actor_model.predict(s_batch) # gradient Q value for actions (critic) critic_grads = self.compute_critic_gradient( [predicted_q_value, a_batch, s_batch])[0] # use this gradient to update the policy (actor) loss_actor = self.optimize([s_batch, critic_grads, 1])[0] """ grads = self.sess.run(self.critic_grads, feed_dict={ self.critic_state_input: s_batch, self.critic_action_input: a_outs })[0] self.sess.run(self.optimize, feed_dict={ self.actor_state_input: s_batch, self.actor_critic_grad: grads }) """ # Update target networks self.update_target() ep_reward += reward episode_min_reward = min(reward, episode_min_reward) previous_observation = observation if done or j == self.config['max step'] - 1: loss_critic = loss_critic / (float(j) if j != 0 else 1.0) loss_actor = loss_actor / (float(j) if j != 0 else 1.0) ep_reward = ep_reward / (float(j) if j != 0 else 1.0) ep_ave_max_q = ep_ave_max_q / (float(j) if j != 0 else 1.0) # do summary preparation merged = self.sess.run( self.merged, feed_dict={ self.actor_state_input: s_batch, self.critic_state_input: s_batch, self.critic_action_input: a_batch, self.episode_reward: ep_reward, self.loss_critic: loss_critic, self.loss_actor: loss_actor, self.episode_min_reward: episode_min_reward, self.ep_action: action, self.ep_base_action: base_action, self.episode_ave_max_q: ep_ave_max_q }) writer.add_summary(merged, i) writer.flush() print( 'Episode: {:d}, Critic Loss:{} Actor Loss:{} Average Reward: {:.2f}, Average Qmax: {:.4f}' .format(i, loss_critic, loss_actor, ep_reward, ep_ave_max_q)) print('--- top indice {}, top 3 base actions {}'.format( np.where(base_action == base_action.max())[0][0], sorted(base_action)[-3:])) print('+++ top indice {}, top 3 actions {}'.format( np.where(action == action.max())[0][0], sorted(action)[-3:])) #print('Action: norm {}, values {}'.format(action.sum(), action)) #print('---Base Action: norm {}, values {}'.format(base_action.sum(), base_action)) break # check if we must add a termination based on no more evolution if abs(ep_ave_max_q - stored_ep_ave_max_q) < delta_QMax: # steps must be consecutive if i - previous_i == 1: stored_episodes_stable += 1 else: stored_episodes_stable = 0 previous_i = i stored_ep_ave_max_q = ep_ave_max_q if stored_episodes_stable > nb_episodes_stable: print("Early break in episode ", i) break self.save_model(verbose=True) print('Finish.') def predict(self, observation): """ predict the next action using actor model, only used in deploy. Can be used in multiple environments. Args: observation: (batch_size, num_stocks + 1, window_length) Returns: action array with shape (batch_size, num_stocks + 1) """ if self.obs_normalizer: observation = self.obs_normalizer(observation) action = self.actor.predict(observation) if self.action_processor: action = self.action_processor(action) return action def predict_single(self, observation): """ Predict the action of a single observation Args: observation: (num_stocks + 1, window_length) Returns: a single action array with shape (num_stocks + 1,) """ if self.obs_normalizer and isinstance(observation, dict): observation = observation[self.obs_normalizer] action = self.actor_model.predict(np.expand_dims( observation, axis=0)).squeeze(axis=0) if self.action_processor: action = self.action_processor(action) return action def save_model(self, verbose=False): if not os.path.exists(self.model_save_path): os.makedirs(self.model_save_path, exist_ok=True) # make sure we save all parameters for layer in self.root_model.layers: layer.trainable = True for layer in self.target_root_model.layers: layer.trainable = True saver = tf.train.Saver() model_path = saver.save(self.sess, self.model_save_path) print("Model saved in %s" % model_path)
class Autoencoder: def __init__(self, n_end, data, activation=LeakyReLU(0.1), optimizer='adam', lr=0.001, l2=0.0, l1=0.00000, is_GT=True, plot_every_n=10): self.n_end = n_end self.hsi = data self.activation = activation self.lr = lr self.l2 = l2 self.l1 = l1 self.optimizer = optimizer # self.optimizer = optimizers.Adam(lr=self.lr) self.model = None self.use_bias = False self.abundance_layer = None self.initializer = initializers.glorot_normal() self.sum_to_one = True self.is_GT = is_GT self.plot_every_n = plot_every_n self.plotS = True self.weights = None self.is_deep = False self.activation = activation def create_model(self, loss): use_bias = False # Input layer input_ = Input(shape=(self.hsi.n_bands, )) # Encoder if self.is_deep: encoded = Dense(self.n_end * 9, use_bias=use_bias, kernel_regularizer=None, kernel_initializer=None, activation=self.activation)(input_) # en coded = BatchNormalization()(encoded) encoded = Dense(self.n_end * 6, use_bias=use_bias, kernel_regularizer=None, kernel_initializer=None, activation=self.activation)(encoded) encoded = Dense(self.n_end * 3, use_bias=use_bias, kernel_regularizer=None, kernel_initializer=None, activation=self.activation)(encoded) encoded = Dense(self.n_end, use_bias=use_bias, kernel_regularizer=None, activity_regularizer=None, activation=self.activation)(encoded) else: encoded = Dense(self.n_end, use_bias=use_bias, activation=self.activation, activity_regularizer=None, kernel_regularizer=None)(input_) # Utility Layers # Batch Normalization encoded = BatchNormalization()(encoded) # Soft Thresholding encoded = utils.SparseReLU(alpha_initializer='zero', alpha_constraint=non_neg(), activity_regularizer=None)(encoded) # Sum To One (ASC) encoded = utils.SumToOne(axis=0, name='abundances', activity_regularizer=None)(encoded) decoded = GaussianDropout(0.0045)(encoded) # Decoder decoded = Dense(self.hsi.n_bands, activation='linear', name='endmembers', use_bias=use_bias, kernel_constraint=non_neg(), kernel_regularizer=None, kernel_initializer=self.initializer)(encoded) self.model = Model(inputs=input_, outputs=decoded, name='Autoencoder') # Compile Model self.model.compile(self.optimizer, loss, metrics=[utils.SAD]) # Fit Model def fit(self, epochs, batch_size): progress = TQDMCallback(leave_outer=True, leave_inner=True) setattr(progress, 'on_train_batch_begin', lambda x, y: None) setattr(progress, 'on_train_batch_end', lambda x, y: None) plotWhileTraining = PlotWhileTraining(self.plot_every_n, self.hsi.size, self.n_end, self.hsi, self.hsi.GT, self.is_GT, True) hist = self.model.fit(self.hsi.data, self.hsi.data, epochs=epochs, batch_size=batch_size, verbose=0, callbacks=[progress, plotWhileTraining], shuffle=True) return hist # Shuffle or reset weights def shuffle_weights(self, weights): if weights is None: weights = self.model.get_weights() weights = [ np.random.permutation(w.flat).reshape(w.shape) for w in weights ] # Faster, but less random: only permutes along the first dimension # weights = [np.random.permutation(w) for w in weights] self.model.set_weights(weights) def get_endmembers(self): return self.model.layers[len(self.model.layers) - 1].get_weights()[0] def get_abundances(self): intermediate_layer_model = Model( inputs=self.model.input, outputs=self.model.get_layer('abundances').output) abundances = intermediate_layer_model.predict(self.hsi.orig_data) return abundances def save_results(self, out_dir, fname): if out_dir is not None: out_path = out_dir / fname else: out_path = fname endmembers = self.get_endmembers() abundances = self.get_abundances() sio.savemat(out_path, {'M': endmembers, 'A': abundances})
class ActorNetwork(object): """ Input to the network is the state, output is the action under a deterministic policy. The output layer activation is a tanh to keep the action between -action_bound and action_bound """ def __init__(self, sess, root_net, inputs, state_dim, action_dim, action_bound, learning_rate, tau, batch_size): """ Args: sess: a tensorflow session state_dim: a list specifies shape action_dim: a list specified action shape action_bound: whether to normalize action in the end learning_rate: learning rate tau: target network update parameter batch_size: use for normalization """ self.sess = sess assert isinstance(state_dim, list), 'state_dim must be a list.' self.s_dim = state_dim assert isinstance(action_dim, list), 'action_dim must be a list.' self.a_dim = action_dim self.action_bound = action_bound self.learning_rate = learning_rate self.tau = tau self.batch_size = batch_size # Input self.inputs = inputs self.target_inputs = inputs # Actor Network self.out, self.scaled_out = self.create_actor_network( root_net=root_net) self.actor_model = Model(inputs=inputs, outputs=self.out) self.network_params = tf.trainable_variables() # Target Network self.target_out, self.target_scaled_out = self.create_actor_network( root_net=None) self.target_actor_model = Model(inputs=self.target_inputs, outputs=self.target_out) #self.target_network_params = tf.trainable_variables()[len(self.network_params):] # Op for periodically updating target network with online network # weights #self.update_target_network_params = \ # [self.target_network_params[i].assign(tf.multiply(self.network_params[i], self.tau) + # tf.multiply(self.target_network_params[i], 1. - self.tau)) # for i in range(len(self.target_network_params))] # This gradient will be provided by the critic network self.action_gradient = tf.placeholder(tf.float32, [None] + self.a_dim) # Combine the gradients here self.unnormalized_actor_gradients = tf.gradients( self.scaled_out, self.network_params, -self.action_gradient) self.actor_gradients = list( map(lambda x: tf.div(x, self.batch_size), self.unnormalized_actor_gradients)) # Optimization Op self.optimize = tf.train.AdamOptimizer(self.learning_rate). \ apply_gradients(zip(self.actor_gradients, self.network_params)) #self.num_trainable_vars = len(self.network_params) + len(self.target_network_params) def create_actor_network(self, root_net): raise NotImplementedError( 'Create actor should return (inputs, out, scaled_out)') def train(self, inputs, a_gradient): self.sess.run(self.optimize, feed_dict={ self.inputs: inputs, self.action_gradient: a_gradient }) def predict(self, inputs): return self.sess.run(self.scaled_out, feed_dict={self.inputs: inputs}) def predict_target(self, inputs): return self.sess.run(self.target_scaled_out, feed_dict={self.target_inputs: inputs}) def update_target_network(self): # self.sess.run(self.update_target_network_params) self.target_actor_model.set_weights(self.actor_model.get_weights())
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False): """Test routine for a layer with a single input tensor and single output tensor. """ # generate input data if input_data is None: assert input_shape if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) else: x = Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) assert K.dtype(y) == expected_output_dtype # check with the functional API model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: assert expected_dim == actual_dim if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
class DeepQ(object): """Constructs the desired deep q learning network""" def __init__(self, action_size, observation_size, lr=LEARNING_RATE): self.action_size = action_size self.observation_size = observation_size self.lr = lr self.model = None self.target_model = None self.qvalue_evolution = [] self.construct_q_network() def construct_q_network(self): """ Construct both the actual Q-network and the target network with three hidden layers and ReLu activation functions in between. The network uses an Adam optimizer with MSE loss.""" self.model = Sequential() input_layer = Input(shape=(self.observation_size * NUM_FRAMES, )) layer1 = Dense(self.observation_size * NUM_FRAMES)(input_layer) layer1 = Activation('relu')(layer1) layer3 = Dense(self.observation_size)(layer1) layer3 = Activation('relu')(layer3) layer4 = Dense(2 * self.action_size)(layer3) layer4 = Activation('relu')(layer4) output = Dense(self.action_size)(layer4) self.model = Model(inputs=[input_layer], outputs=[output]) self.model.compile(loss='mse', optimizer=Adam(lr=self.lr)) self.target_model = Model(inputs=[input_layer], outputs=[output]) self.target_model.compile(loss='mse', optimizer=Adam(lr=self.lr)) self.target_model.set_weights(self.model.get_weights()) def predict_movement(self, data, epsilon): """ Predict the next action from the network. Epsilon is the probability of making a random move. Returns the optimal action and the predicted reward for that action. """ rand_val = np.random.random() q_actions = self.model.predict(data.reshape( 1, self.observation_size * NUM_FRAMES), batch_size=1) if rand_val < epsilon: opt_policy = np.random.randint(0, self.action_size) else: opt_policy = np.argmax(np.abs(q_actions[0])) self.qvalue_evolution.append(q_actions[0][opt_policy]) return opt_policy, q_actions[0][opt_policy] def predict_rewards(self, data): """ Like predict_movement, only without a probability of a random move and returns only the predicted q-values.""" q_actions = self.model.predict(np.array(data).reshape( 1, self.observation_size * NUM_FRAMES), batch_size=1) return q_actions[0] def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch): """ Trains the network on a batch of input. The parameters are batches of states, actions, rewards, done booleans and next states. """ batch_size = s_batch.shape[0] # Train according to the Bellman Equation targets = self.model.predict(s_batch, batch_size=batch_size) fut_action = self.target_model.predict(s2_batch, batch_size=batch_size) targets[:, a_batch.flatten()] = r_batch targets[d_batch, a_batch[d_batch]] += DISCOUNT_RATE * np.max( fut_action[d_batch], axis=-1) targets_ts = tf.convert_to_tensor(targets, dtype=tf.float32) loss = self.model.train_on_batch(s_batch, targets_ts) return loss def train_imitation(self, s_batch, t_batch): """ Trains network on generated data: Imitation Learning. """ loss = self.model.train_on_batch(s_batch, t_batch) return loss def save_network(self, path): if not os.path.exists(path): os.mkdir(path) self.model.save(os.path.join(path, 'network.h5')) print("Successfully saved network.") def load_network(self, path): self.model = load_model(os.path.join(path, 'network.h5')) print("Successfully loaded network.") def target_train(self): """ The target network is updated each step by 'merging' a small part of the actual network into it. """ model_weights = self.model.get_weights() target_model_weights = self.target_model.get_weights() for i in range(len(model_weights)): target_model_weights[i] = TAU * model_weights[i] + ( 1 - TAU) * target_model_weights[i] self.target_model.set_weights(target_model_weights)