class Critic(tf.keras.Model): def __init__(self,model_parameters=None): super().__init__(name = "critic") if model_parameters is None: model_parameters = { 'lr': 0.0001, 'beta1': 0, 'batch_size': 64, 'latent_dim': 128, 'image_size': 152 } self.layers_blocks = list() self.model_parameters = model_parameters dim = model_parameters['batch_size'] / 2 init = RandomNormal(stddev=0.02) #Layers self.conv_1 = Conv2D(dim, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init, input_shape=[model_parameters['image_size'], model_parameters['image_size'], 3]) self.leaky_1 = LeakyReLU(alpha=0.2) number_of_layers_needed = int(math.log(model_parameters['image_size'],2))-3 for i in range(number_of_layers_needed): dim *= 2 self.layers_blocks.append([ Conv2D(dim, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init), LayerNormalization(), LeakyReLU(alpha=0.2) ]) self.flat = Flatten() self.logits = Dense(1) # This neuron tells us how real or fake the input is self.optimizer = Adam(learning_rate=model_parameters['lr'],beta_1=model_parameters['beta1'],beta_2=0.9) def call(self, input_tensor, training = True): ## Definition of Forward Pass x = self.leaky_1(self.conv_1(input_tensor)) for i in range(len(self.layers_blocks)): layers_block = self.layers_blocks[i] for layer in layers_block: x = layer(x, training = training) x = self.flat(x) return self.logits(x) def compute_loss(self,y_true,y_pred): """ Wasserstein loss """ return backend.mean(y_true * y_pred) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def save_optimizer(self): weights = self.optimizer.get_weights() data_access.store_weights_in_file('c_optimizer_weights',weights)
class Generator(tf.keras.Model): def __init__(self, model_parameters=None): super().__init__(name='generator') #layers if model_parameters is None: model_parameters = { 'lr': 0.0001, 'beta1': 0, 'batch_size': 64, 'latent_dim': 128, 'image_size': 152 } self.model_parameters = model_parameters self.batch_size = model_parameters['batch_size'] self.noise_size = model_parameters['latent_dim'] init = RandomNormal(stddev=0.02) self.dense_1 = Dense(8*5*5*self.batch_size, use_bias = False, input_shape = (self.noise_size,)) self.batchNorm1 = BatchNormalization() self.leaky_1 = ReLU() self.reshape_1 = Reshape((5,5,8*self.batch_size)) self.up_2 = UpSampling2D((2,2), interpolation='nearest') self.conv2 = Conv2D(4*self.batch_size, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.batchNorm2 = BatchNormalization() self.leaky_2 = ReLU() self.up_3 = UpSampling2D((2,2), interpolation='nearest') self.conv3 = Conv2D(2*self.batch_size, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.crop_3 = Cropping2D(cropping=((1,0),(1,0))) self.batchNorm3 = BatchNormalization() self.leaky_3 = ReLU() self.up_4 = UpSampling2D((2,2), interpolation='nearest') self.conv4 = Conv2D(self.batch_size, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.batchNorm4 = BatchNormalization() self.leaky_4 = ReLU() self.up_5 = UpSampling2D((2,2), interpolation='nearest') self.conv5 = Conv2D(self.batch_size/2, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.batchNorm5 = BatchNormalization() self.leaky_5 = ReLU() self.up_6 = UpSampling2D((2,2), interpolation='nearest') self.conv6 = Conv2D(3, (5, 5), activation='tanh', strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.optimizer = Adam(learning_rate=model_parameters['lr'],beta_1=model_parameters['beta1'],beta_2=0.9) def call(self, input_tensor, training = True): ## Definition of Forward Pass x = self.leaky_1(self.batchNorm1(self.reshape_1(self.dense_1(input_tensor)),training = training)) x = self.leaky_2(self.batchNorm2(self.conv2(self.up_2(x)),training = training)) x = self.leaky_3(self.batchNorm3(self.crop_3(self.conv3(self.up_3(x))),training = training)) x = self.leaky_4(self.batchNorm4(self.conv4(self.up_4(x)),training = training)) x = self.leaky_5(self.batchNorm5(self.conv5(self.up_5(x)),training = training)) return self.conv6(self.up_6(x)) def generate_noise(self,batch_size, random_noise_size): return tf.random.normal([batch_size, random_noise_size]) def compute_loss(self,y_true,y_pred,class_wanted,class_prediction): """ Wasserstein loss - prob of classifier get it right """ k = 10 # hiper-parameter return backend.mean(y_true * y_pred) + (k * categorical_crossentropy(class_wanted,class_prediction)) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def save_optimizer(self): weights = self.optimizer.get_weights() data_access.store_weights_in_file('g_optimizer_weights',weights) def set_seed(self): self.seed = tf.random.normal([self.batch_size, self.noise_size]) data_access.store_seed_in_file('seed',self.seed) def load_seed(self): self.seed = data_access.load_seed_from_file('seed')
class Critic(tf.keras.Model): def __init__(self,model_parameters=None): super().__init__(name = "critic") if model_parameters is None: model_parameters = { 'lr': 0.0001, 'beta1': 0, 'batch_size': 64, 'latent_dim': 128, 'image_size': 152 } self.model_parameters = model_parameters init = RandomNormal(stddev=0.02) #Layers self.conv_1 = Conv2D(32, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init, input_shape=[model_parameters['image_size'], model_parameters['image_size'], 3]) self.leaky_1 = LeakyReLU(alpha=0.2) self.conv_2 = Conv2D(64, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init) self.layer_norm_2 = LayerNormalization() self.leaky_2 = LeakyReLU(alpha=0.2) self.conv_3 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init) self.layer_norm_3 = LayerNormalization() self.leaky_3 = LeakyReLU(alpha=0.2) self.conv_4 = Conv2D(256, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init) self.layer_norm_4 = LayerNormalization() self.leaky_4 = LeakyReLU(alpha=0.2) self.conv_5 = Conv2D(512, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init) self.layer_norm_5 = LayerNormalization() self.leaky_5 = LeakyReLU(alpha=0.2) self.flat = Flatten() self.logits = Dense(1) # This neuron tells us how real or fake the input is self.optimizer = Adam(learning_rate=model_parameters['lr'],beta_1=model_parameters['beta1'],beta_2=0.9) def call(self, input_tensor, training = True): ## Definition of Forward Pass x = self.leaky_1(self.conv_1(input_tensor)) x = self.leaky_2(self.layer_norm_2(self.conv_2(x))) x = self.leaky_3(self.layer_norm_3(self.conv_3(x))) x = self.leaky_4(self.layer_norm_4(self.conv_4(x))) x = self.leaky_5(self.layer_norm_5(self.conv_5(x))) x = self.flat(x) return self.logits(x) def compute_loss(self,y_true,y_pred): """ Wasserstein loss """ return backend.mean(y_true * y_pred) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def save_optimizer(self): weights = self.optimizer.get_weights() data_access.store_weights_in_file('c_optimizer_weights',weights)
class LFPTrainer(): def __init__(self, dataloader, actor, probabilistic, encoder=None, planner=None, distribute_strategy=None, learning_rate='3e-4', plan_lr_multiplier=1, clipnorm=5.0, gcbc=False): self.actor = actor self.encoder = encoder self.planner = planner self.distribute_strategy = distribute_strategy self.probabilistic = probabilistic self.gcbc = gcbc self.window_size = dataloader.window_size self.quaternion_act = dataloader.quaternion_act self.batch_size = dataloader.batch_size with self.distribute_strategy.scope(): # self.actor_optimizer = Adam(learning_rate=learning_rate, global_clipnorm=clipnorm) # self.encoder_optimizer = Adam(learning_rate=learning_rate, global_clipnorm=clipnorm) # self.planner_optimizer = Adam(learning_rate=learning_rate*plan_lr_multiplier, global_clipnorm=clipnorm) self.global_optimizer = Adam(learning_rate=learning_rate) self.actor_grad_len = len(self.actor.trainable_variables) if not self.gcbc: self.encoder_grad_len = len(self.encoder.trainable_variables) self.planner_grad_len = len(self.planner.trainable_variables) # Metrics self.metrics = {} self.metrics['train_loss'] = tf.keras.metrics.Mean(name='train_loss') self.metrics['actor_grad_norm'] = tf.keras.metrics.Mean(name='actor_grad_norm') self.metrics['actor_grad_norm_clipped'] = tf.keras.metrics.Mean(name='actor_grad_clipped') self.metrics['valid_loss'] = tf.keras.metrics.Mean(name='valid_loss') self.metrics['valid_position_loss'] = tf.keras.metrics.Mean(name='valid_position_loss') self.metrics['valid_max_position_loss'] = lfp.metric.MaxMetric(name='valid_max_position_loss') self.metrics['valid_rotation_loss'] = tf.keras.metrics.Mean(name='valid_rotation_loss') self.metrics['valid_max_rotation_loss'] = lfp.metric.MaxMetric(name='valid_max_rotation_loss') self.metrics['valid_gripper_loss'] = tf.keras.metrics.Mean(name='valid_gripper_loss') self.metrics['global_grad_norm'] = tf.keras.metrics.Mean(name='global_grad_norm') def compute_loss(labels, predictions, mask, seq_lens): if self.probabilistic: per_example_loss = self.nll_action_loss(labels, predictions) * mask else: per_example_loss = self.mae_action_loss(labels, predictions) * mask per_example_loss = tf.reduce_sum(per_example_loss, axis=1) / seq_lens # take mean along the timestep return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.batch_size) def compute_MAE(labels, predictions, mask, seq_lens, weightings=None): per_example_loss = self.mae_action_loss(labels, predictions) * mask per_example_loss = tf.reduce_sum(per_example_loss, axis=1) / seq_lens # take mean along the timestep return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.batch_size) def compute_regularisation_loss(plan, encoding): # Reverse KL(enc|plan): we want planner to map to encoder (weighted by encoder) reg_loss = tfp.distributions.kl_divergence(encoding, plan) return tf.nn.compute_average_loss(reg_loss, global_batch_size=self.batch_size) # Losses # done this way so that they are in stratey scope self.nll_action_loss = lambda y, p_y: tf.reduce_sum(-p_y.log_prob(y), axis=2) self.mae_action_loss = tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE) self.mse_action_loss = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE) self.compute_loss = compute_loss self.compute_MAE = compute_MAE self.compute_regularisation_loss = compute_regularisation_loss if not self.gcbc: self.metrics['train_reg_loss'] = tf.keras.metrics.Mean(name='train_reg_loss') self.metrics['train_act_with_enc_loss'] = tf.keras.metrics.Mean(name='train_act_with_enc_loss') self.metrics['train_act_with_plan_loss'] = tf.keras.metrics.Mean(name='train_act_with_plan_loss') self.metrics['encoder_grad_norm'] = tf.keras.metrics.Mean(name='encoder_grad_norm') self.metrics['planner_grad_norm'] = tf.keras.metrics.Mean(name='planner_grad_norm') self.metrics['encoder_grad_norm_clipped'] = tf.keras.metrics.Mean(name='encoder_grad_norm_clipped') self.metrics['planner_grad_norm_clipped'] = tf.keras.metrics.Mean(name='planner_grad_norm_clipped') self.metrics['valid_reg_loss'] = tf.keras.metrics.Mean(name='valid_reg_loss') self.metrics['valid_act_with_enc_loss'] = tf.keras.metrics.Mean(name='valid_act_with_enc_loss') self.metrics['valid_act_with_plan_loss'] = tf.keras.metrics.Mean(name='valid_act_with_plan_loss') # Now outside strategy .scope def train_step(self, inputs, beta, prev_global_grad_norm): # Todo: figure out mask and seq_lens for new dataset states, actions, goals, seq_lens, mask = inputs['obs'], inputs['acts'], inputs['goals'], inputs['seq_lens'], \ inputs['masks'] if self.gcbc: with tf.GradientTape() as actor_tape: distrib = self.actor([states, goals]) loss = self.compute_loss(actions, distrib, mask, seq_lens) gradients = actor_tape.gradient(loss, self.actor.trainable_variables) self.actor_optimizer.apply_gradients(zip(gradients, self.actor.trainable_variables)) else: with tf.GradientTape() as tape:#, tf.GradientTape() as encoder_tape, tf.GradientTape() as planner_tape: encoding = self.encoder([states, actions]) plan = self.planner([states[:, 0, :], goals[:, 0, :]]) # the final goals are tiled out over the entire non masked sequence, so the first timestep is the final goal. z_enc = encoding.sample() z_plan = plan.sample() z_enc_tiled = tf.tile(tf.expand_dims(z_enc, 1), (1, self.window_size, 1)) z_plan_tiled = tf.tile(tf.expand_dims(z_plan, 1), (1, self.window_size, 1)) enc_policy = self.actor([states, z_enc_tiled, goals]) plan_policy = self.actor([states, z_plan_tiled, goals]) act_enc_loss = self.compute_loss(actions, enc_policy, mask, seq_lens) act_plan_loss = self.compute_loss(actions, plan_policy, mask, seq_lens) act_loss = act_enc_loss reg_loss = self.compute_regularisation_loss(plan, encoding) loss = act_loss + reg_loss * beta # # Gradients # actor_gradients = actor_tape.gradient(loss, self.actor.trainable_variables) # encoder_gradients = encoder_tape.gradient(loss, self.encoder.trainable_variables) # planner_gradients = planner_tape.gradient(loss, self.planner.trainable_variables) # all_gradients = actor_gradients + encoder_gradients + planner_gradients # concat lists gradients = tape.gradient(loss, self.actor.trainable_variables+self.encoder.trainable_variables+self.planner.trainable_variables) actor_gradients = gradients[:self.actor_grad_len] encoder_gradients = gradients[self.actor_grad_len:self.actor_grad_len+self.encoder_grad_len] planner_gradients = gradients[self.actor_grad_len+self.encoder_grad_len:self.actor_grad_len+self.encoder_grad_len+self.planner_grad_len] self.metrics['actor_grad_norm'].update_state(tf.linalg.global_norm(actor_gradients)) self.metrics['encoder_grad_norm'].update_state(tf.linalg.global_norm(encoder_gradients)) self.metrics['planner_grad_norm'].update_state(tf.linalg.global_norm(planner_gradients)) # if the gradient norm is more than 3x the previous one, clip it to the previous norm for stability gradients = tf.cond(tf.linalg.global_norm(gradients) > 3 * prev_global_grad_norm, lambda: tf.clip_by_global_norm(gradients, prev_global_grad_norm)[0], lambda: gradients) # must get[0] as it returns new norm as [1] planner_gradients = [g * 10 for g in planner_gradients] self.global_optimizer.apply_gradients(zip(gradients, self.actor.trainable_variables+self.encoder.trainable_variables+self.planner.trainable_variables)) # # Optimizer step # self.actor_optimizer.apply_gradients(zip(actor_gradients, self.actor.trainable_variables)) # self.encoder_optimizer.apply_gradients(zip(encoder_gradients, self.encoder.trainable_variables)) # self.planner_optimizer.apply_gradients(zip(planner_gradients, self.planner.trainable_variables)) # Train Metrics self.metrics['global_grad_norm'].update_state(tf.linalg.global_norm(gradients)) self.metrics['train_reg_loss'].update_state(reg_loss) self.metrics['train_act_with_enc_loss'].update_state(act_enc_loss) self.metrics['train_act_with_plan_loss'].update_state(act_plan_loss) self.metrics['actor_grad_norm_clipped'].update_state(tf.linalg.global_norm(actor_gradients)) self.metrics['encoder_grad_norm_clipped'].update_state(tf.linalg.global_norm(encoder_gradients)) self.metrics['planner_grad_norm_clipped'].update_state(tf.linalg.global_norm(planner_gradients)) self.metrics['train_loss'].update_state(loss) return loss def test_step(self, inputs, beta): states, actions, goals, seq_lens, mask = inputs['obs'], inputs['acts'], inputs['goals'], inputs['seq_lens'], \ inputs['masks'] if self.quaternion_act: # xyz, q1-4, grip action_breakdown = [3, 4, 1] else: action_breakdown = [3, 3, 1] if self.gcbc: policy = self.actor([states, goals], training=False) loss = self.compute_loss(actions, policy, mask, seq_lens) if self.probabilistic: pos_acts, rot_acts, grip_act = tf.split(policy.sample(), action_breakdown, -1) else: pos_acts, rot_acts, grip_act = tf.split(policy, action_breakdown, -1) else: encoding = self.encoder([states, actions]) plan = self.planner([states[:, 0, :], goals[:, 0, :]]) # the final goals are tiled out over the entire non masked sequence, so the first timestep is the final goal. z_enc = encoding.sample() z_plan = plan.sample() z_enc_tiled = tf.tile(tf.expand_dims(z_enc, 1), (1, self.window_size, 1)) z_plan_tiled = tf.tile(tf.expand_dims(z_plan, 1), (1, self.window_size, 1)) enc_policy = self.actor([states, z_enc_tiled, goals]) plan_policy = self.actor([states, z_plan_tiled, goals]) act_enc_loss = self.compute_loss(actions, enc_policy, mask, seq_lens) act_plan_loss = self.compute_loss(actions, plan_policy, mask, seq_lens) act_loss = act_plan_loss reg_loss = self.compute_regularisation_loss(plan, encoding) # pos, rot, gripper individual losses if self.probabilistic: pos_acts, rot_acts, grip_act = tf.split(plan_policy.sample(), action_breakdown, -1) else: pos_acts, rot_acts, grip_act = tf.split(plan_policy, action_breakdown, -1) loss = act_loss + reg_loss * beta true_pos_acts, true_rot_acts, true_grip_act = tf.split(actions, action_breakdown, -1) # Validation Metrics self.metrics['valid_reg_loss'].update_state(reg_loss) self.metrics['valid_act_with_enc_loss'].update_state(act_enc_loss) self.metrics['valid_act_with_plan_loss'].update_state(act_plan_loss) self.metrics['valid_position_loss'].update_state(self.compute_MAE(true_pos_acts, pos_acts, mask, seq_lens)) self.metrics['valid_max_position_loss'](true_pos_acts, pos_acts, mask) self.metrics['valid_rotation_loss'].update_state(self.compute_MAE(true_rot_acts, rot_acts, mask, seq_lens)) self.metrics['valid_max_rotation_loss'](true_rot_acts, rot_acts, mask) self.metrics['valid_gripper_loss'].update_state(self.compute_MAE(true_grip_act, grip_act, mask, seq_lens)) self.metrics['valid_loss'].update_state(loss) if self.gcbc: return loss else: return loss, z_enc, z_plan @tf.function def distributed_train_step(self, dataset_inputs, beta, prev_global_grad_norm): per_replica_losses = self.distribute_strategy.run(self.train_step, args=(dataset_inputs, beta, prev_global_grad_norm)) losses = self.distribute_strategy.reduce(ReduceOp.MEAN, per_replica_losses, axis=None) return losses @tf.function def distributed_test_step(self, dataset_inputs, beta): if self.gcbc: per_replica_losses = self.distribute_strategy.run(self.test_step, args=(dataset_inputs, beta)) losses = self.distribute_strategy.reduce(ReduceOp.MEAN, per_replica_losses, axis=None) return losses else: per_replica_losses, ze, zp = self.distribute_strategy.run(self.test_step, args=(dataset_inputs, beta)) losses = self.distribute_strategy.reduce(ReduceOp.MEAN, per_replica_losses, axis=None) return losses, ze.values[0], zp.values[0] def save_weights(self, path, config=None, run_id=None, step=""): os.makedirs(path, exist_ok=True) # Save the config as json if config is not None: print('Saving training config...') with open(f'{path}/config.json', 'w') as f: d = vars(config) d['run_id'] = run_id json.dump(d, f) # save timestepped version might be better to save timestepped versions within subfolders? # print('Saving model weights...') # if step != "": # self.actor.save_weights(f'{path}/actor_{str(step)}.h5') # if not self.gcbc: # self.encoder.save_weights(f'{path}/encoder_{str(step)}.h5') # self.planner.save_weights(f'{path}/planner_{str(step)}.h5') # save the latest version self.actor.save_weights(f'{path}/actor.h5') if not self.gcbc: self.encoder.save_weights(f'{path}/encoder.h5') self.planner.save_weights(f'{path}/planner.h5') os.makedirs(path+'/optimizers', exist_ok=True) np.save(f'{path}/optimizers/optimizer.npy', self.global_optimizer.get_weights()) # save the optimizer state # np.save(f'{path}/optimizers/actor_optimizer.npy', self.actor_optimizer.get_weights()) # if not self.gcbc: # np.save(f'{path}/optimizers/encoder_optimizer.npy', self.encoder_optimizer.get_weights()) # np.save(f'{path}/optimizers/planner_optimizer.npy', self.planner_optimizer.get_weights()) def load_weights(self, path, with_optimizer=False, step=""): # IMO better to load timestepped version from subfolders - Todo self.actor.load_weights(f'{path}/actor.h5') if not self.gcbc: self.encoder.load_weights(f'{path}/encoder.h5') self.planner.load_weights(f'{path}/planner.h5') if with_optimizer: #self.load_optimizer_state(self.actor_optimizer, f'{path}/optimizers/actor_optimizer.npy', self.actor.trainable_variables) self.load_optimizer_state(self.global_optimizer, f'{path}/optimizers/optimizer.npy', self.actor.trainable_variables+self.encoder.trainable_variables+self.planner.trainable_variables) # if not self.gcbc: # self.load_optimizer_state(self.encoder_optimizer, f'{path}/optimizers/encoder_optimizer.npy', self.encoder.trainable_variables) # self.load_optimizer_state(self.planner_optimizer, f'{path}/optimizers/planner_optimizer.npy', self.planner.trainable_variables) def load_optimizer_state(self, optimizer, load_path, trainable_variables): def optimizer_step(): # need to do this to initialize the optimiser # dummy zero gradients zero_grads = [tf.zeros_like(w) for w in trainable_variables] # save current state of variables saved_vars = [tf.identity(w) for w in trainable_variables] # Apply gradients which don't do anything optimizer.apply_gradients(zip(zero_grads, trainable_variables)) # Reload variables [x.assign(y) for x, y in zip(trainable_variables, saved_vars)] return 0.0 @tf.function def distributed_opt_step(): ''' Only used for optimizer checkpointing - we need to run a pass to initialise all the optimizer weights. Can't use restore as colab TPUs don't have a local filesystem. ''' per_replica_losses = self.distribute_strategy.run(optimizer_step, args=()) return self.distribute_strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_losses, axis=None) # Load optimizer weights opt_weights = np.load(load_path, allow_pickle=True) # init the optimiser distributed_opt_step() # Set the weights of the optimizer optimizer.set_weights(opt_weights) # class LFPTrainer_v2(): # nll_action_loss = lambda y, p_y: tf.reduce_sum(-p_y.log_prob(y), axis=2) # mae_action_loss = tf.keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.NONE) # mse_action_loss = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE) # def __init__(self, optimizer, global_batch_size): # self.optimizer = optimizer # self.global_batch_size = global_batch_size # self.train_loss, self.valid_loss, self.actor_grad_norm, self.encoder_grad_norm, self.planner_grad_norm, \ # self.actor_grad_norm_clipped, self.encoder_grad_norm_clipped, self.planner_grad_norm_clipped, self.global_grad_norm, \ # self.test, self.test2, self.train_act_with_enc_loss, self.train_act_with_plan_loss, self.valid_act_with_enc_loss, self.valid_act_with_plan_loss,\ # self.train_reg_loss, self.valid_reg_loss, self.valid_position_loss, self.valid_max_position_loss, self.valid_rotation_loss, self.valid_max_rotation_loss, \ # self.valid_gripper_loss = lfp.metrics.create_metrics() # def compute_loss(self, labels, predictions, mask, seq_lens, weightings=None): # if config['num_distribs'] is not None: # per_example_loss = self.nll_action_loss(labels, predictions) * mask # else: # per_example_loss = self.mae_action_loss(labels, predictions) * mask # per_example_loss = tf.reduce_sum(per_example_loss, axis=1) / seq_lens # take mean along the timestep # return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.global_batch_size) # def compute_MAE(self, labels, predictions, mask, seq_lens, weightings=None): # per_example_loss = self.mae_action_loss(labels, predictions) * mask # per_example_loss = tf.reduce_sum(per_example_loss, axis=1) / seq_lens # take mean along the timestep # return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.global_batch_size) # def compute_regularisation_loss(self, plan, encoding): # # Reverse KL(enc|plan): we want planner to map to encoder (weighted by encoder) # reg_loss = self.tfd.kl_divergence(encoding, plan) # + KL(plan, encoding) # return tf.nn.compute_average_loss(reg_loss, global_batch_size=self.global_batch_size)
class Generator(tf.keras.Model): def __init__(self, model_parameters=None): super().__init__(name='generator') #layers if model_parameters is None: model_parameters = { 'lr': 0.0001, 'beta1': 0, 'batch_size': 64, 'latent_dim': 128, 'image_size': 152 } self.model_parameters = model_parameters self.batch_size = model_parameters['batch_size'] self.noise_size = model_parameters['latent_dim'] dim = 8 * self.batch_size init = RandomNormal(stddev=0.02) self.dense_1 = Dense(dim*4*4, use_bias = False, input_shape = (self.noise_size,)) self.batchNorm1 = BatchNormalization() self.leaky_1 = ReLU() self.reshape_1 = Reshape((4,4,dim)) self.layers_blocks = list() number_of_layers_needed = int(math.log(model_parameters['image_size'],2))-3 for i in range(number_of_layers_needed): dim /= 2 self.layers_blocks.append([ UpSampling2D((2,2), interpolation='nearest'), Conv2D(dim, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init), BatchNormalization(), ReLU(), ]) self.up_toRGB = UpSampling2D((2,2), interpolation='nearest') self.conv_toRGB = Conv2D(3, (5, 5), activation='tanh', strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.optimizer = Adam(learning_rate=model_parameters['lr'],beta_1=model_parameters['beta1'],beta_2=0.9) def call(self, input_tensor, training = True): ## Definition of Forward Pass x = self.leaky_1(self.batchNorm1(self.reshape_1(self.dense_1(input_tensor)),training = training)) for i in range(len(self.layers_blocks)): layers_block = self.layers_blocks[i] for layer in layers_block: x = layer(x, training = training) x = self.conv_toRGB(self.up_toRGB(x)) return x def generate_noise(self,batch_size, random_noise_size): return tf.random.normal([batch_size, random_noise_size]) def compute_loss(self,y_true,y_pred): return backend.mean(y_true * y_pred) def compute_loss_class(self,y_true,y_pred,class_wanted,class_prediction): """ Wasserstein loss - prob of classifier get it right """ k = 10 # hiper-parameter return backend.mean(y_true * y_pred) + (k * categorical_crossentropy(class_wanted,class_prediction)) def compute_loss_divergence(self,y_true,y_pred,class_wanted,class_prediction): k = 10 # hiper-parameter kl = KLDivergence() return backend.mean(y_true * y_pred) + (k * kl(class_wanted,class_prediction)) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def save_optimizer(self): weights = self.optimizer.get_weights() data_access.store_weights_in_file('g_optimizer_weights',weights) def set_seed(self): self.seed = tf.random.normal([self.batch_size, self.noise_size]) data_access.store_seed_in_file('seed',self.seed) def load_seed(self): self.seed = data_access.load_seed_from_file('seed')