def with_global_norm_clipping( optimizer: optimizers.Optimizer = gin.REQUIRED, clip_norm: Optional[float] = gin.REQUIRED) -> optimizers.Optimizer: original_compute_gradients = optimizer._compute_gradients def _compute_gradients( self: optimizers.Optimizer, loss: Callable[[], tf.Tensor], var_list: Iterable[tf.Variable], grad_loss: Optional[tf.Tensor] = None ) -> List[Tuple[Optional[tf.Tensor], tf.Variable]]: grads_and_vars = cast( List[Tuple[Optional[tf.Tensor], tf.Variable]], original_compute_gradients(loss, var_list, grad_loss)) print('clipping') if clip_norm is None: return grads_and_vars grads, var_list = zip(*grads_and_vars) grads = tf.clip_by_global_norm(grads, clip_norm) return list(zip(grads, var_list)) def get_config(self: optimizers.Optimizer) -> NoReturn: raise NotImplementedError optimizer._compute_gradients = MethodType(_compute_gradients, optimizer) optimizer.get_config = MethodType(get_config, optimizer) return optimizer
def train_step(model_with_loss: Loss, optimizer: Optimizer, x_batch, y_batch) -> Dict[str, tf.Tensor]: with tf.GradientTape(persistent=True) as tape: # type: ignore _ = model_with_loss(x_batch, y_batch) loss_value = model_with_loss.metric_values["_loss"] # print('watched variables') # print(tape.watched_variables()) loss_mean = tf.reduce_mean(loss_value) # encoded_mean = tf.reduce_mean(tape.watched_variables()[-1]) metric_values = model_with_loss.metric_values error = tf.cast(tf.argmax(metric_values["outputs"], axis=1) != tf.argmax(y_batch, axis=1), tf.float32) metric_values["error"] = error model_with_loss.reset() # grads = tape.gradient(tf.reduce_mean(loss_value), self.encoder_decoder.parameters()) # print('Encoder Decoder Params') # print(self.encoder_decoder.parameters()) # print(tape.gradient(tf.reduce_mean(loss_value), tape.watched_variables())) # print('dLoss / dWatched') # print(tape.gradient(loss_mean, tape.watched_variables())) # for var in tape.watched_variables(): # print(f'd{var.name}/dWatched') # print(tape.gradient(var, tape.watched_variables())) # grads = None # print(grads) grads = tape.gradient(loss_mean, model_with_loss.parameters()) optimizer.apply_gradients(zip(grads, model_with_loss.parameters())) return metric_values
def train_one_step(model: Model, optimizer: optimizers.Optimizer, x, y) -> (tf.Tensor, tf.Tensor): with tf.GradientTape() as tape: logits = model(x) loss = compute_loss(logits, y) grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) accuracy = compute_accuracy(logits, y) return loss, accuracy
def train_model( model: Union[Model, NaiveModel], optimizer: Optimizer, loss_fn: Loss, batch_streamer_train: BatchStreamer, batch_streamer_test: BatchStreamer, epochs: int = 10, ) -> Union[Model, NaiveModel]: """ Train the previously built model as described in the paper. """ loss_per_epoch_train = [] loss_per_epoch_test = [] for epoch in range(epochs): print(f'Current Epoch: {epoch}') # loss for each mini-batch loss_array_train = [] loss_array_test = [] # TRAIN batch_streamer_train.reset() # ensure that all iterators are reset for H, F, C, P in tqdm(batch_streamer_train): with GradientTape() as tape: # generate and evaluate predictions sigmoid_proba = model([H, F, C], training=True) loss_value = loss_fn(P, sigmoid_proba) loss_array_train.append(loss_value) if isinstance(model, NaiveModel): pass else: # update weights with computed gradients grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) mean_loss_current_epoch_train = np.array(loss_array_train).mean() loss_per_epoch_train.append(mean_loss_current_epoch_train) print(f'Avg loss train: {mean_loss_current_epoch_train}') # TEST batch_streamer_test.reset() for H, F, C, P in tqdm(batch_streamer_test): # generate and evaluate predictions sigmoid_proba=model([H, F, C], training=False) loss_value = loss_fn(P, sigmoid_proba) loss_array_test.append(loss_value) mean_loss_current_epoch_test = np.array(loss_array_test).mean() loss_per_epoch_test.append(mean_loss_current_epoch_test.copy()) print(f'Avg loss train: {mean_loss_current_epoch_test}') return model
def improve_policy(policy: Model, env: gym.Env, optimizer: optimizers.Optimizer, episodes=100): """Improve the policy utilizing the policy gradient algorithm.""" for _ in range(episodes): with tf.GradientTape() as tape: rewards, log_probs = play_episode(policy, env) returns = normalize(discount(rewards, gamma=0.99)) policy_loss = sum(-tf.squeeze(log_probs) * returns) vars = policy.trainable_variables grads = tape.gradient(policy_loss, vars) optimizer.apply_gradients(zip(grads, vars))
def train_step(model: keras.Model, optimizer: optimizers.Optimizer, images: np.ndarray, labels: np.ndarray, loss_function: keras.losses.Loss, accuracy_metric: keras.metrics.Metric, loss_metric: tf.keras.metrics.Metric): with tf.GradientTape() as tape: logits = model(images, training=True) loss = loss_function(labels, logits) accuracy_metric(labels, logits) loss_metric(loss) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return loss
def _optimize( self, train_vars: List[Variable], loss: Tensor, optim: Optimizer, tape: tf.GradientTape, ) -> None: """Optimize the variables.""" if self.mixed_precision: loss = optim.get_scaled_loss(loss) grads = tape.gradient(loss, train_vars) if self.mixed_precision: grads = optim.get_unscaled_gradients(grads) optim.apply_gradients(zip(grads, train_vars))
def get_optimizer(optimizer_name): optimizers_dict = { subcl.__name__: subcl for subcl in Optimizer.__subclasses__() } assert optimizer_name in optimizers_dict.keys( ), "Optimizer name is not in PyTorch optimizers" return optimizers_dict[optimizer_name]
def train(model: models.Model, optimizer: optimizers.Optimizer, train_instances: List[Dict[str, np.ndarray]], validation_sentences: List[List[str]], validation_trees: List[DependencyTree], parsing_system: ParsingSystem, vocabulary: Vocabulary, num_epochs: int, batch_size: int) -> Dict[str, Union[models.Model, str]]: """ Trains a model on the given training instances as configured and returns the trained model. """ print("\nGenerating Training batches:") train_batches = generate_batches(train_instances, batch_size) train_batches = [(batch["inputs"], batch["labels"]) for batch in train_batches] for epoch in range(num_epochs): print(f"\nEpoch {epoch}") # Training Epoch total_training_loss = 0 generator_tqdm = tqdm(train_batches) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): with tf.GradientTape() as tape: model_outputs = model(inputs=batch_inputs, labels=batch_labels) loss_value = model_outputs["loss"] grads = tape.gradient(loss_value, model.trainable_variables) clipped_grads = [tf.clip_by_norm(grad, 5) for grad in grads] optimizer.apply_gradients( zip(clipped_grads, model.trainable_variables)) total_training_loss += loss_value description = ("Average training loss: %.2f " % (total_training_loss / (index + 1))) generator_tqdm.set_description(description, refresh=False) # Validation evaluation print("Evaluating validation performance:") predicted_trees = predict(model, validation_sentences, parsing_system, vocabulary) evaluation_report = evaluate(validation_sentences, parsing_system, predicted_trees, validation_trees) print("\n" + evaluation_report) training_outputs = {"model": model, "evaluation_report": evaluation_report} return training_outputs
def _pre_compile(self, optimizer: Optimizer) -> Tuple[Optimizer, Union[bool, None]]: """ Method to call before calling 'compile' to setup the run and inputs for using horovod. :param optimizer: The optimzier to compile. It will be wrapped in horovod's distributed optimizer: 'hvd.DistributedOptimizer'. :return: The updated parameters: [0] = Wrapped optimizer. [1] = The 'experimental_run_tf_function' parameter for 'compile' kwargs or 'None' if horovod should not be used. :raise MLRunInvalidArgumentError: In case the optimizer was passed as a string. """ # Check if needed to run with horovod: if self._hvd is None: return optimizer, None # Validate the optimizer input: if isinstance(optimizer, str): raise mlrun.errors.MLRunInvalidArgumentError( "When using horovod, the compile method is expecting an initialized optimizer instance and not a " "string." ) # Setup the device to run on GPU if available: if ( tf.config.experimental.list_physical_devices("GPU") and os.environ.get("CUDA_VISIBLE_DEVICES", "None") != "-1" ): # Pin each GPU to a single process: gpus = tf.config.experimental.list_physical_devices("GPU") if gpus: tf.config.experimental.set_visible_devices( gpus[self._hvd.local_rank()], "GPU" ) print( f"Horovod worker #{self._hvd.rank()} is using GPU #{self._hvd.local_rank()}" ) else: # No GPUs were found, or 'use_cuda' was false: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" print(f"Horovod worker #{self._hvd.rank()} is using CPU") # Adjust learning rate based on the number of GPUs: optimizer.lr = optimizer.lr * self._hvd.size() # Wrap the optimizer in horovod's distributed optimizer: 'hvd.DistributedOptimizer'. optimizer = self._hvd.DistributedOptimizer(optimizer) # Compile the model with `experimental_run_tf_function=False` to ensure Tensorflow uses the distributed # optimizer to compute the gradients: experimental_run_tf_function = False return optimizer, experimental_run_tf_function
def train_step( inputs: tuple[Tensor, Tensor | float], dynamics: Dynamics, optimizer: Optimizer, loss_fn: Callable = LatticeLoss, ) -> tuple[Tensor, dict]: x_init, beta = inputs with tf.GradientTape() as tape: x_out, tmetrics = dynamics((to_u1(x_init), tf.constant(beta))) x_prop = tmetrics.pop('mc_states').proposed.x # x_prop = metrics.pop('mc_states').proposed.x loss = loss_fn(x_init=x_init, x_prop=x_prop, acc=tmetrics['acc']) grads = tape.gradient(loss, dynamics.trainable_variables) optimizer.apply_gradients(zip(grads, dynamics.trainable_variables)) metrics = { 'loss': loss, } for key, val in tmetrics.items(): metrics[key] = val return to_u1(x_out), metrics
def train(model: models.Model, optimizer: optimizers.Optimizer, train_instances: List[Dict[str, np.ndarray]], validation_instances: List[Dict[str, np.ndarray]], num_epochs: int, batch_size: int, serialization_dir: str = None) -> tf.keras.Model: """ Trains a model on the give training instances as configured and stores the relevant files in serialization_dir. Returns model and some important metrics. """ print("\nGenerating Training batches:") train_batches = generate_batches(train_instances, batch_size) print("Generating Validation batches:") validation_batches = generate_batches(validation_instances, batch_size) train_batch_labels = [ batch_inputs.pop("labels") for batch_inputs in train_batches ] validation_batch_labels = [ batch_inputs.pop("labels") for batch_inputs in validation_batches ] tensorboard_logs_path = os.path.join(serialization_dir, f'tensorboard_logs') tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path) best_epoch_validation_accuracy = float("-inf") best_epoch_validation_loss = float("inf") for epoch in range(num_epochs): print(f"\nEpoch {epoch}") total_training_loss = 0 total_correct_predictions, total_predictions = 0, 0 generator_tqdm = tqdm(list(zip(train_batches, train_batch_labels))) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): with tf.GradientTape() as tape: logits = model(**batch_inputs, training=True)["logits"] loss_value = cross_entropy_loss(logits, batch_labels) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) total_training_loss += loss_value batch_predictions = np.argmax(tf.nn.softmax(logits, axis=-1).numpy(), axis=-1) total_correct_predictions += ( batch_predictions == batch_labels).sum() total_predictions += batch_labels.shape[0] description = ( "Average training loss: %.2f Accuracy: %.2f " % (total_training_loss / (index + 1), total_correct_predictions / total_predictions)) generator_tqdm.set_description(description, refresh=False) average_training_loss = total_training_loss / len(train_batches) training_accuracy = total_correct_predictions / total_predictions total_validation_loss = 0 total_correct_predictions, total_predictions = 0, 0 generator_tqdm = tqdm( list(zip(validation_batches, validation_batch_labels))) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): logits = model(**batch_inputs, training=False)["logits"] loss_value = cross_entropy_loss(logits, batch_labels) total_validation_loss += loss_value batch_predictions = np.argmax(tf.nn.softmax(logits, axis=-1).numpy(), axis=-1) total_correct_predictions += ( batch_predictions == batch_labels).sum() total_predictions += batch_labels.shape[0] description = ( "Average validation loss: %.2f Accuracy: %.2f " % (total_validation_loss / (index + 1), total_correct_predictions / total_predictions)) generator_tqdm.set_description(description, refresh=False) average_validation_loss = total_validation_loss / len( validation_batches) validation_accuracy = total_correct_predictions / total_predictions if validation_accuracy > best_epoch_validation_accuracy: print( "Model with best validation accuracy so far: %.2f. Saving the model." % (validation_accuracy)) classifier.save_weights( os.path.join(serialization_dir, f'model.ckpt')) best_epoch_validation_loss = average_validation_loss best_epoch_validation_accuracy = validation_accuracy with tensorboard_writer.as_default(): tf.summary.scalar("loss/training", average_training_loss, step=epoch) tf.summary.scalar("loss/validation", average_validation_loss, step=epoch) tf.summary.scalar("accuracy/training", training_accuracy, step=epoch) tf.summary.scalar("accuracy/validation", validation_accuracy, step=epoch) tensorboard_writer.flush() metrics = { "training_loss": float(average_training_loss), "validation_loss": float(average_validation_loss), "training_accuracy": float(training_accuracy), "best_epoch_validation_accuracy": float(best_epoch_validation_accuracy), "best_epoch_validation_loss": float(best_epoch_validation_loss) } print("Best epoch validation accuracy: %.4f, validation loss: %.4f" % (best_epoch_validation_accuracy, best_epoch_validation_loss)) return {"model": model, "metrics": metrics}
def __init__(self, encoder: Encoder, decoder: Decoder, optimizer: Optimizer = None, loss_function: Loss = None, num_words: int = 10000, batch_size: int = 512, test_ratio: float = 0.3, max_words_in_sentence: int = 20, sentence_predictor: SentencePredictor = None, checkpoint_dir: str = "../../../data/german-english/seq2seq-checkpoints/", checkpoint_steps: int = 1, restore: bool = True): if optimizer is None: optimizer = Adam(learning_rate=tf.Variable(0.01), beta_1=tf.Variable(0.9), beta_2=tf.Variable(0.999), epsilon=tf.Variable(1e-7)) optimizer.iterations optimizer.decay = tf.Variable(0.0) if loss_function is None: loss_function = SparseCategoricalCrossentropy(reduction="none") if sentence_predictor is None: sentence_predictor = GreedyPredictor( max_words_in_sentence=max_words_in_sentence) self.optimizer = optimizer self.loss_function = loss_function self.encoder = encoder self.decoder = decoder self.num_words = num_words self.batch_size = batch_size self.test_ratio = test_ratio self.loss_per_epoch = [] self.input_tokenizer, self.output_tokenizer = None, None self.input_word_index, self.output_word_index = None, None self.max_words_in_sentence = max_words_in_sentence self.sentence_predictor = sentence_predictor self.checkpoint_dir = checkpoint_dir self.checkpoint_steps = checkpoint_steps self.checkpoint = None self.checkpoint = tf.train.Checkpoint(encoder=self.encoder, decoder=self.decoder, optimizer=self.optimizer) self.checkpoint_manager = tf.train.CheckpointManager( self.checkpoint, self.checkpoint_dir, max_to_keep=3) if not os.path.exists(self.checkpoint_dir): os.mkdir(self.checkpoint_dir) if not os.path.exists(self.checkpoint_dir + "/tokenizer"): os.mkdir(self.checkpoint_dir + "/tokenizer") if restore and len(os.listdir(self.checkpoint_dir)) > 1: self.status = self.checkpoint.restore( self.checkpoint_manager.checkpoints[-2]).expect_partial() with open(self.checkpoint_dir + "/tokenizer/input.pickle", "rb") as f: self.input_tokenizer = pickle.load(f) with open(self.checkpoint_dir + "/tokenizer/output.pickle", "rb") as f: self.output_tokenizer = pickle.load(f) self.input_word_index = self.input_tokenizer.word_index self.output_word_index = self.output_tokenizer.word_index
def train(model: models.Model, optimizer: optimizers.Optimizer, train_batches: List, validation_batches: List, num_epochs: int, serialization_dir: str = None, config: Dict = None, vocab: Dict = None) -> tf.keras.Model: tensorboard_logs_path = os.path.join(serialization_dir, f'tensorboard_logs') tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path) best_epoch_validation_accuracy = float("-inf") best_epoch_validation_loss = float("inf") # DFN performs better with a higher regularization lambda regularization_lambda = 1e-5 if model.__class__.__name__ != 'DFN' else 1e-3 training_loss_plot = [] validation_loss_plot = [] training_accuracy_plot = [] validation_accuracy_plot = [] for epoch in range(num_epochs): print(f"\nEpoch {epoch}") total_training_loss = 0 total_correct_predictions, total_predictions = 0, 0 generator_tqdm = tqdm(train_batches) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): with tf.GradientTape() as tape: logits = model(batch_inputs, training=True) loss_value = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( batch_labels, logits)) loss_value += regularization_lambda * tf.reduce_sum( [tf.nn.l2_loss(x) for x in model.weights]) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) total_training_loss += loss_value batch_predictions = tf.math.argmax(tf.nn.softmax(logits, axis=-1), axis=-1) total_correct_predictions += tf.math.reduce_sum( tf.cast(batch_predictions == batch_labels, dtype=tf.int64)) total_predictions += batch_labels.get_shape()[0] description = ( "Average training loss: %.2f Accuracy: %.2f " % (total_training_loss / (index + 1), total_correct_predictions / total_predictions)) generator_tqdm.set_description(description, refresh=False) average_training_loss = total_training_loss / len(train_batches) training_accuracy = total_correct_predictions / total_predictions total_validation_loss = 0 total_correct_predictions, total_predictions = 0, 0 generator_tqdm = tqdm(validation_batches) for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm): logits = model(batch_inputs, training=False) loss_value = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( batch_labels, logits)) total_validation_loss += loss_value batch_predictions = tf.math.argmax(tf.nn.softmax(logits, axis=-1), axis=-1) total_correct_predictions += tf.math.reduce_sum( tf.cast(batch_predictions == batch_labels, dtype=tf.int64)) total_predictions += batch_labels.get_shape()[0] description = ( "Average validation loss: %.2f Accuracy: %.2f " % (total_validation_loss / (index + 1), total_correct_predictions / total_predictions)) generator_tqdm.set_description(description, refresh=False) average_validation_loss = total_validation_loss / len( validation_batches) validation_accuracy = total_correct_predictions / total_predictions if validation_accuracy > best_epoch_validation_accuracy: if serialization_dir is not None: print( "Model with best validation accuracy so far: %.2f. Saving the model." % (validation_accuracy)) save_model(model, config, vocab, serialization_dir) best_epoch_validation_loss = average_validation_loss best_epoch_validation_accuracy = validation_accuracy with tensorboard_writer.as_default(): tf.summary.scalar("{} loss/training".format( model.__class__.__name__), average_training_loss, step=epoch) tf.summary.scalar("{} loss/validation".format( model.__class__.__name__), average_validation_loss, step=epoch) tf.summary.scalar("{} accuracy/training".format( model.__class__.__name__), training_accuracy, step=epoch) tf.summary.scalar("{} accuracy/validation".format( model.__class__.__name__), validation_accuracy, step=epoch) tensorboard_writer.flush() training_loss_plot.append(float(average_training_loss)) training_accuracy_plot.append(float(training_accuracy)) validation_loss_plot.append(float(average_validation_loss)) validation_accuracy_plot.append(float(validation_accuracy)) metrics = { "training_loss": float(average_training_loss), "validation_loss": float(average_validation_loss), "training_accuracy": float(training_accuracy), "best_epoch_validation_accuracy": float(best_epoch_validation_accuracy), "best_epoch_validation_loss": float(best_epoch_validation_loss) } plot_data = { 'training_loss': training_loss_plot, 'training_accuracy': training_accuracy_plot, 'validation_loss': validation_loss_plot, 'validation_accuracy': validation_accuracy_plot } print("Best epoch validation accuracy: %.4f, validation loss: %.4f" % (best_epoch_validation_accuracy, best_epoch_validation_loss)) return {"model": model, "metrics": metrics, 'plot_data': plot_data}
def update_weights(self, optimizer: Optimizer, network: Network, batch, weight_decay: float): """ First predict the values for the given observations and actions Then calculate the loss between those predictions and results of the MCTS stored in the replay buffer Before optimizing the weights, add a L2-Regularization to the loss (adds a penalty if the weights get to big) Finally updating all weights in the network (all 3 models) with the given optimizer. """ def scale_gradient(tensor, scale): return tensor * scale + tf.stop_gradient(tensor) * (1. - scale) def get_loss_callback(): loss = tf.convert_to_tensor([0], dtype=float) for image, actions, targets in batch: # Prevents the computation of the image to be taken into account for computing gradients image = tf.stop_gradient(image, 'get_image') # Transform real observation to hidden state, then predict value, reward and policy distribution for it value, reward, policy_logits, hidden_state = network.initial_inference(image) predictions = [(1.0, value, reward, policy_logits)] # Recurrent steps, from action and previous hidden state. for action in actions: # Build the input for the recurrent inference and stop the gradients at the input action_tensor = tf.convert_to_tensor(action.action_id, dtype=float) action_layer = tf.ones(hidden_state.shape) * action_tensor hidden_state_with_action = tf.concat([hidden_state, action_layer], axis=3) hidden_state_with_action = tf.stop_gradient(hidden_state_with_action, 'get_hidden_state_with_action') value, reward, policy_logits, hidden_state = network.recurrent_inference(hidden_state_with_action) hidden_state = scale_gradient(hidden_state, 0.5) predictions.append((1.0 / len(actions), value, reward, policy_logits)) # Calculate the loss between the predictions and the target for prediction, target in zip(predictions, targets): gradient_scale, value, reward, policy_logits = prediction target_value, target_reward, target_policy = target assert value.shape == target_value.shape assert reward.shape == target_reward.shape assert policy_logits.shape == target_policy.shape l = ( self.scalar_loss(value, target_value) + self.scalar_loss(reward, target_reward) + self.scalar_loss(policy_logits, target_policy) ) loss += scale_gradient(l, gradient_scale) print('Loss before l2: ', loss) """ FIXME: L2 loss is way to high (sometimes even inf) # Add L2-Regularization to the overall loss for weights in network.get_weights_callback()(): loss += weight_decay * tf.nn.l2_loss(weights) """ # Optimize the current weights print('Loss after l2: ', loss) return loss optimizer.minimize(get_loss_callback, network.get_weights_callback()) network.train_step += 1
def train_step(step: int, optimizer: Optimizer, network: Network, batch, weight_decay: float): """ Batch is 3-tuple of: Observation (N,80,80,1) for atari or (N,4) for cartpole Actions (N,K+1) - k=0 is a dummy action -1 Value Targets (N,K+1) Reward Targets (N,K+1) Policy Targets (N,K+1,A) Masks (N,K+1) """ value_loss_metric = tf.keras.metrics.Mean() reward_loss_metric = tf.keras.metrics.Mean() policy_loss_metric = tf.keras.metrics.Mean() value_pred_mean = tf.keras.metrics.Mean() reward_pred_mean = tf.keras.metrics.Mean() policy_pred_dist = [] value_pred_k0_dist, value_pred_k1_dist, value_pred_kf_dist = [], [], [] reward_pred_k0_dist, reward_pred_k1_dist, reward_pred_kf_dist = [], [], [] value_target_mean = tf.keras.metrics.Mean() reward_target_mean = tf.keras.metrics.Mean() policy_target_dist = [] value_target_k0_dist, value_target_k1_dist, value_target_kf_dist = [], [], [] reward_target_k0_dist, reward_target_k1_dist, reward_target_kf_dist = [], [], [] observations, actions, target_values, target_rewards, target_policies, masks, policy_masks = batch with tf.GradientTape() as f_tape, tf.GradientTape( ) as g_tape, tf.GradientTape() as h_tape: loss = 0 K = actions.shape[1] # seqlen for k in range(K): # Targets z, u, pi, mask_, policy_mask_ = target_values[:, k], target_rewards[:, k], target_policies[:, k], masks[:, k], policy_masks[:, k] mask, policy_mask = tf.squeeze(mask_), tf.squeeze( policy_mask_) # (N,) rather than (N,1) if k == 0: # Initial step, from the real observation. value, reward, policy_logits, hidden_state = network.initial_inference( observations, convert_to_scalar=False) gradient_scale = 1.0 else: # All following steps # masked_actions = tf.boolean_mask(actions[:,k], mask, axis=0) # hidden_state_masked = tf.boolean_mask(hidden_state, mask, axis=0) value, reward, policy_logits, hidden_state = network.recurrent_inference( hidden_state, actions[:, k], convert_to_scalar=False) gradient_scale = 1.0 / (K - 1) hidden_state = scale_gradient( hidden_state, 0.5) # Todo: is this compatible with masking?? # Masking # Be careful with masking, if all values are masked it returns len 0 tensor z_masked = z #tf.boolean_mask(z, mask, axis=0) u_masked = u #tf.boolean_mask(u, mask, axis=0) pi_masked = tf.boolean_mask( pi, policy_mask, axis=0) # policy mask is mask but rolled left by 1 value_masked = value #tf.boolean_mask(value, mask, axis=0) reward_masked = reward #tf.boolean_mask(reward, mask, axis=0) policy_logits_masked = tf.boolean_mask(policy_logits, policy_mask, axis=0) # z_masked = z*mask_ # u_masked = u*mask_ # pi_masked = pi*mask_ # policy mask is mask but rolled left by 1 # # value_masked = value*mask_ # reward_masked = reward*mask_ # policy_logits_masked = policy_logits*mask_ value_loss = ce_loss( value_masked, scalar_to_support(z_masked, network.value_support_size), mask) reward_loss = ce_loss( reward_masked, scalar_to_support(u_masked, network.reward_support_size), mask) policy_loss = ce_loss(policy_logits_masked, pi_masked, mask) combined_loss = 1.0 * value_loss + 1.0 * reward_loss + 1.0 * policy_loss loss += scale_gradient(combined_loss, gradient_scale) if tf.math.is_nan(loss): print("Loss is NaN") pdb.set_trace() # Metric logging for tensorboard value_loss_metric(value_loss) reward_loss_metric(reward_loss) policy_loss_metric(policy_loss) scalar_value = support_to_scalar(value, network.value_support_size) scalar_reward = support_to_scalar(reward, network.reward_support_size) policy_probs = tf.nn.softmax(policy_logits) if (step + 1) % 100 == 0: print("break") value_pred_mean(scalar_value) reward_pred_mean(scalar_reward) policy_pred_dist.append(policy_probs * policy_mask_) if k == 0: value_pred_k0_dist.append(scalar_value) reward_pred_k0_dist.append(scalar_reward) if k == 1: value_pred_k1_dist.append(scalar_value) reward_pred_k1_dist.append(scalar_reward) if k == K - 1: value_pred_kf_dist.append(scalar_value) reward_pred_kf_dist.append(scalar_reward) value_target_mean(z_masked) reward_target_mean(u_masked) policy_target_dist.append(pi * policy_mask_) if k == 0: value_target_k0_dist.append(z) reward_target_k0_dist.append(u) if k == 1: value_target_k1_dist.append(z) reward_target_k1_dist.append(u) if k == K - 1: value_target_kf_dist.append(z) reward_target_kf_dist.append(u) # total_loss_metric(loss) # total_reward() # Todo: Eventually we want to use keras layer regularization or AdamW weight_reg_loss = 0 for weights in network.get_weights(): weight_reg_loss += weight_decay * tf.add_n([ tf.nn.l2_loss(w) for w in weights ]) # sum of l2 norm of weight matrices - also consider tf.norm # weight_reg_loss_metric(weight_reg_loss) # loss += weight_reg_loss # Is there a cleaner way to implement this? f_grad = f_tape.gradient(loss, network.f.trainable_variables) g_grad = g_tape.gradient(loss, network.g.trainable_variables) h_grad = h_tape.gradient(loss, network.h.trainable_variables) optimizer.apply_gradients(zip(f_grad, network.f.trainable_variables)) optimizer.apply_gradients(zip(g_grad, network.g.trainable_variables)) optimizer.apply_gradients(zip(h_grad, network.h.trainable_variables)) # optimizer.minimize(loss=loss, var_list=network.cb_get_variables()) return value_loss_metric.result(), reward_loss_metric.result(), policy_loss_metric.result(), weight_reg_loss, loss, f_grad, g_grad, h_grad, \ value_pred_mean.result(), reward_pred_mean.result(), policy_pred_dist, value_target_mean.result(), reward_target_mean.result(), policy_target_dist, \ actions[:,1:], \ value_pred_k0_dist, value_pred_k1_dist, value_pred_kf_dist, value_target_k0_dist, value_target_k1_dist, value_target_kf_dist, \ reward_pred_k0_dist, reward_pred_k1_dist, reward_pred_kf_dist, reward_target_k0_dist, reward_target_k1_dist, reward_target_kf_dist
def train(model: models.Model, optimizer: optimizers.Optimizer, train_instances: List[Dict[str, np.ndarray]], validation_instances: List[Dict[str, np.ndarray]], num_epochs: int, max_length_train: int, max_length_validation: int, embedding_dim: int, batch_size: int, number_of_clusters: int, hidden_units_in_autoencoder_layers: List[int], serialization_dir: str = None) -> tf.keras.Model: """ Trains a model on the give training instances as configured and stores the relevant files in serialization_dir. Returns model and some important metrics. """ tensorboard_logs_path = os.path.join(serialization_dir, f'tensorboard_logs') tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path) #best_epoch_validation_accuracy = float("-inf") best_epoch_loss = float("inf") best_epoch_validation_silhouette_score = float("-inf") KMeans_1 = Clustering.KMeansClustering for epoch in range(num_epochs): print(f"\nEpoch {epoch}") total_training_loss = 0 #total_abstract_train_part_hidden_value = [] total_abstract_train_part_hidden_value = 0 total_abstract_validation_part_hidden_value = [] k = 0 list_of_trainable_variables = [] print("\nGenerating Training batches:") train_batches, embed_dim_train = generate_batches( train_instances, batch_size, max_length_train, embedding_dim) print("Generating Validation batches:") validation_batches, embed_dim_validation = generate_batches( validation_instances, batch_size, max_length_validation, embedding_dim) noise_data_train = np.random.normal( loc=0, scale=0.1, size=[len(train_instances) * 2, max_length_train, embed_dim_train]) noise_data_validation = np.random.normal( loc=0, scale=0.1, size=[ len(validation_instances) * 2, max_length_validation, embed_dim_validation ]) train_batch_tickers = [ batch_inputs.pop("Ticker") for batch_inputs in train_batches ] validation_batch_tickers = [ batch_inputs.pop("Ticker") for batch_inputs in validation_batches ] train_batch_dates = [ batch_inputs.pop("Date") for batch_inputs in train_batches ] validation_batch_dates = [ batch_inputs.pop("Date") for batch_inputs in validation_batches ] train_batch_asset_returns = [ batch_inputs.pop('Asset_Returns') for batch_inputs in train_batches ] validation_batch_asset_returns = [ batch_inputs.pop('Asset_Returns') for batch_inputs in validation_batches ] generator_tqdm = tqdm(list(zip(train_batches, train_batch_tickers))) for index, (batch_inputs, batch_tickers) in enumerate(generator_tqdm): with tf.GradientTape() as tape: # if epoch == 0: noise = noise_data_train[k * batch_size * 2:(k + 1) * batch_size * 2, :] fake_input, train_real_fake_labels = get_fake_sample( batch_inputs['inputs']) real_batch_inputs = batch_inputs['inputs'] batch_inputs['inputs'] = np.concatenate( (batch_inputs['inputs'], fake_input), axis=0) batch_inputs['real_fake_label'] = train_real_fake_labels batch_inputs['noise'] = noise batch_inputs[ 'Validation_Inputs'] = validation_batch_asset_returns[ index] batch_inputs['Training_Inputs'] = train_batch_asset_returns[ index] batch_inputs['batch_tickers'] = batch_tickers loss, f_new = model(batch_inputs, epoch, training=True) if epoch % 2 == 0: # batch_inputs['F_Updated_Value'] = f_new """ if epoch == 0: part_hidden_val = np.array(hidden_state).reshape(-1,2*sum(hidden_units_in_autoencoder_layers) )#np.sum(config.hidden_size) * 2 W = part_hidden_val.T U, sigma, VT = np.linalg.svd(W) sorted_indices = np.argsort(sigma) topk_evecs = VT[sorted_indices[:-number_of_clusters - 1:-1], :] F_new = topk_evecs.T batch_inputs['F_Updated_Value'] = F_new #total_abstract_train_part_hidden_value.append(part_hidden_val) total_abstract_train_part_hidden_value = part_hidden_val km = KMeans(n_clusters=number_of_clusters) cluster_labels_training = km.fit_predict(X=total_abstract_train_part_hidden_value) estimated_return_vector, estimated_covariance_matrix, cluster_weights, cluster_variance, trainable_variables_loss, list_of_trainable_variables = Calculate_Cluster_Weights.calculate_train_data_return_vector_for_all_stocks( cluster_labels_training, real_batch_inputs) asset_weights = [value for key,value in cluster_weights.items()] #asset_weights = np.perm(asset_weights,cluster_labels_training) estimated_return_vector = tf.concat(estimated_return_vector, axis=0) validation_period_out_of_sample_returns = validation_batches[index]['inputs'] mean_of_validation_period_out_of_sample_returns = tf.math.reduce_mean( validation_period_out_of_sample_returns, axis=1) #difference_vector = estimated_return_vector - mean_of_validation_period_out_of_sample_returns difference_vector = tf.math.pow(0,2) #difference_vector = tf.tensordot(difference_vector,asset_weights,axis=0) #mean_squared_loss_of_in_sample_and_out_of_sample_returns = tf.keras.losses.mean_squared_error( #mean_of_validation_period_out_of_sample_returns, estimated_return_vector) #loss += difference_vector silhouette_score_training = silhouette_score(total_abstract_train_part_hidden_value, cluster_labels_training) loss += 1e-4 * trainable_variables_loss # calculate_train_data_return_vector_for_all_stocks(cluster_labels_training,) else: batch_inputs.pop('F_Updated_Value',None) """ regularization_loss = 0 #list_of_trainable_variables = for var in model.trainable_variables: #print(var) list_of_trainable_variables.append(var) #if (var.name.find("Returns Vector") == -1) and (var.name.find("Estimated_Covariance") == -1): regularization_loss += tf.math.add_n([tf.nn.l2_loss(var)]) loss += 1e-4 * (regularization_loss) k += 1 grads = tape.gradient(loss, list_of_trainable_variables) optimizer.apply_gradients(zip(grads, list_of_trainable_variables)) total_training_loss += loss description = ("Average training loss: %.2f " % (total_training_loss / len(train_instances))) generator_tqdm.set_description(description, refresh=False) average_training_loss = total_training_loss / len(train_instances) if average_training_loss < best_epoch_loss: print( "Model with best training loss so far: %.2f. Saving the model." % (average_training_loss)) best_epoch_loss = average_training_loss model.save_weights(os.path.join(serialization_dir, f'model.ckpt')) """ if epoch % 10 == 0 and epoch != 0: print(total_abstract_train_part_hidden_value[0],total_abstract_train_hidden_state[0])# concatenated_total_abstract_train_part_hidden_value = tf.concat(total_abstract_train_part_hidden_value,axis=1) concatenated_total_abstract_train_part_hidden_value = concatenated_total_abstract_train_part_hidden_value.numpy() silhouette_score_for_k_means_clustering,cluster_labels, kmeans = KMeans.cluster_hidden_states(concatenated_total_abstract_train_part_hidden_value) """ """ total_validation_loss = 0 generator_tqdm = tqdm(list(zip(validation_batches, validation_batch_tickers))) k=0 for index, (batch_inputs,batch_tickers) in enumerate(generator_tqdm): #if epoch == 0: noise = noise_data_validation[k * batch_size * 2: (k + 1) * batch_size * 2, :] fake_input, train_real_fake_labels = get_fake_sample(batch_inputs['inputs']) batch_inputs['inputs'] = np.concatenate((batch_inputs['inputs'], fake_input), axis=0) batch_inputs['real_fake_label'] = train_real_fake_labels batch_inputs['noise'] = noise loss, hidden_state = model(batch_inputs, training=False) #if epoch % 10 == 0 and epoch != 0: #if epoch == 0: part_validation_hidden_val = np.array(hidden_state).reshape(-1, 2*sum(hidden_units_in_autoencoder_layers)) total_abstract_validation_part_hidden_value.append(part_validation_hidden_val) #print(total_abstract_validation_part_hidden_value) #total #kmeans.predict()# #print("") k += 1 #grads = tape.gradient(loss, model.trainable_variables) #optimizer.apply_gradients(zip(grads, model.trainable_variables)) total_validation_loss += loss description = ("Average validation loss: %.2f " % (total_validation_loss / (index + 1))) generator_tqdm.set_description(description, refresh=False) average_validation_loss = total_validation_loss / len(validation_batches) """ #if epoch % 10 == 0 and epoch != 0: #print(total_abstract_validation_part_hidden_value[0]) # """ concatenated_total_abstract_validation_part_hidden_value = tf.concat(total_abstract_validation_part_hidden_value, axis=0) concatenated_total_abstract_validation_part_hidden_value = concatenated_total_abstract_validation_part_hidden_value.numpy() km = KMeans(n_clusters=number_of_clusters) cluster_labels_validation = km.fit_predict(X=concatenated_total_abstract_validation_part_hidden_value) silhouette_score_validation = silhouette_score(concatenated_total_abstract_validation_part_hidden_value,cluster_labels_validation) print(silhouette_score_validation,average_validation_loss) if silhouette_score_validation > best_epoch_validation_silhouette_score and average_validation_loss < best_epoch_validation_loss: print("Model with best validation silhouette score so far: %.2f. Saving the model." % (silhouette_score_validation)) print("Model with best validation loss so far: %.2f. Saving the model." % (average_validation_loss)) model.save_weights(os.path.join(serialization_dir, f'model.ckpt')) best_epoch_validation_silhouette_score = silhouette_score_validation best_epoch_validation_loss = average_validation_loss #best_epoch_validation_accuracy = validation_accuracy """ with tensorboard_writer.as_default(): tf.summary.scalar("loss/training", average_training_loss, step=epoch) #tf.summary.scalar("loss/validation", average_validation_loss, step=epoch) #tf.summary.scalar("accuracy/training", training_accuracy, step=epoch) #tf.summary.scalar("accuracy/validation", best_epoch_validation_silhouette_score, step=epoch) tensorboard_writer.flush() metrics = { "training_loss": float(average_training_loss), #"validation_loss": float(average_validation_loss), #"training_accuracy": float(training_accuracy), #"best_epoch_validation_accuracy": float(best_epoch_validation_silhouette_score), #"best_epoch_validation_loss": float(best_epoch_validation_loss) } #print("Best epoch validation loss: %.4f" % best_epoch_validation_loss) return {"model": model, "metrics": metrics}