Example #1
0
def with_global_norm_clipping(
        optimizer: optimizers.Optimizer = gin.REQUIRED,
        clip_norm: Optional[float] = gin.REQUIRED) -> optimizers.Optimizer:
    original_compute_gradients = optimizer._compute_gradients

    def _compute_gradients(
        self: optimizers.Optimizer,
        loss: Callable[[], tf.Tensor],
        var_list: Iterable[tf.Variable],
        grad_loss: Optional[tf.Tensor] = None
    ) -> List[Tuple[Optional[tf.Tensor], tf.Variable]]:
        grads_and_vars = cast(
            List[Tuple[Optional[tf.Tensor], tf.Variable]],
            original_compute_gradients(loss, var_list, grad_loss))
        print('clipping')
        if clip_norm is None:
            return grads_and_vars
        grads, var_list = zip(*grads_and_vars)
        grads = tf.clip_by_global_norm(grads, clip_norm)
        return list(zip(grads, var_list))

    def get_config(self: optimizers.Optimizer) -> NoReturn:
        raise NotImplementedError

    optimizer._compute_gradients = MethodType(_compute_gradients, optimizer)
    optimizer.get_config = MethodType(get_config, optimizer)
    return optimizer
Example #2
0
def train_step(model_with_loss: Loss, optimizer: Optimizer, x_batch, y_batch) -> Dict[str, tf.Tensor]:
    with tf.GradientTape(persistent=True) as tape: # type: ignore
        _ = model_with_loss(x_batch, y_batch)
        loss_value = model_with_loss.metric_values["_loss"]
        # print('watched variables')
        # print(tape.watched_variables())
        loss_mean = tf.reduce_mean(loss_value)
        # encoded_mean = tf.reduce_mean(tape.watched_variables()[-1])

    metric_values = model_with_loss.metric_values
    error = tf.cast(tf.argmax(metric_values["outputs"], axis=1) != tf.argmax(y_batch, axis=1), tf.float32)
    metric_values["error"] = error
    model_with_loss.reset()
    # grads = tape.gradient(tf.reduce_mean(loss_value), self.encoder_decoder.parameters())
    # print('Encoder Decoder Params')
    # print(self.encoder_decoder.parameters())
    # print(tape.gradient(tf.reduce_mean(loss_value), tape.watched_variables()))
    # print('dLoss / dWatched')
    # print(tape.gradient(loss_mean, tape.watched_variables()))
    # for var in tape.watched_variables():
    #     print(f'd{var.name}/dWatched')
    #     print(tape.gradient(var, tape.watched_variables()))
    
    # grads = None
    # print(grads)
    grads = tape.gradient(loss_mean, model_with_loss.parameters())
    optimizer.apply_gradients(zip(grads, model_with_loss.parameters()))
    return metric_values
Example #3
0
def train_one_step(model: Model, optimizer: optimizers.Optimizer, x,
                   y) -> (tf.Tensor, tf.Tensor):
    with tf.GradientTape() as tape:
        logits = model(x)
        loss = compute_loss(logits, y)
        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        accuracy = compute_accuracy(logits, y)
        return loss, accuracy
Example #4
0
def train_model(
        model: Union[Model, NaiveModel],
        optimizer: Optimizer,
        loss_fn: Loss,
        batch_streamer_train: BatchStreamer,
        batch_streamer_test: BatchStreamer,
        epochs: int = 10,
        ) -> Union[Model, NaiveModel]:
    """
    Train the previously built model as described in the paper.
    """
    loss_per_epoch_train = []
    loss_per_epoch_test = []

    for epoch in range(epochs):
        print(f'Current Epoch: {epoch}')

        # loss for each mini-batch
        loss_array_train = []
        loss_array_test = []

        # TRAIN
        batch_streamer_train.reset() # ensure that all iterators are reset
        for H, F, C, P in tqdm(batch_streamer_train):

            with GradientTape() as tape:
                # generate and evaluate predictions
                sigmoid_proba = model([H, F, C], training=True)
                loss_value = loss_fn(P, sigmoid_proba)
                loss_array_train.append(loss_value)

            if isinstance(model, NaiveModel):
                pass
            else:
                # update weights with computed gradients
                grads = tape.gradient(loss_value, model.trainable_weights)
                optimizer.apply_gradients(zip(grads, model.trainable_weights))

        mean_loss_current_epoch_train = np.array(loss_array_train).mean()
        loss_per_epoch_train.append(mean_loss_current_epoch_train)
        print(f'Avg loss train: {mean_loss_current_epoch_train}')

        # TEST
        batch_streamer_test.reset()
        for H, F, C, P in tqdm(batch_streamer_test):

            # generate and evaluate predictions
            sigmoid_proba=model([H, F, C], training=False)
            loss_value = loss_fn(P, sigmoid_proba)
            loss_array_test.append(loss_value)

        mean_loss_current_epoch_test = np.array(loss_array_test).mean()
        loss_per_epoch_test.append(mean_loss_current_epoch_test.copy())
        print(f'Avg loss train: {mean_loss_current_epoch_test}')

    return model
Example #5
0
def improve_policy(policy: Model, env: gym.Env,
                   optimizer: optimizers.Optimizer, episodes=100):
    """Improve the policy utilizing the policy gradient algorithm."""
    for _ in range(episodes):
        with tf.GradientTape() as tape:
            rewards, log_probs = play_episode(policy, env)
            returns = normalize(discount(rewards, gamma=0.99))
            policy_loss = sum(-tf.squeeze(log_probs) * returns)
        vars = policy.trainable_variables
        grads = tape.gradient(policy_loss, vars)
        optimizer.apply_gradients(zip(grads, vars))
def train_step(model: keras.Model, optimizer: optimizers.Optimizer,
               images: np.ndarray, labels: np.ndarray,
               loss_function: keras.losses.Loss,
               accuracy_metric: keras.metrics.Metric,
               loss_metric: tf.keras.metrics.Metric):
    with tf.GradientTape() as tape:
        logits = model(images, training=True)
        loss = loss_function(labels, logits)
        accuracy_metric(labels, logits)
        loss_metric(loss)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss
Example #7
0
 def _optimize(
     self,
     train_vars: List[Variable],
     loss: Tensor,
     optim: Optimizer,
     tape: tf.GradientTape,
 ) -> None:
     """Optimize the variables."""
     if self.mixed_precision:
         loss = optim.get_scaled_loss(loss)
     grads = tape.gradient(loss, train_vars)
     if self.mixed_precision:
         grads = optim.get_unscaled_gradients(grads)
     optim.apply_gradients(zip(grads, train_vars))
Example #8
0
def get_optimizer(optimizer_name):
    optimizers_dict = {
        subcl.__name__: subcl
        for subcl in Optimizer.__subclasses__()
    }
    assert optimizer_name in optimizers_dict.keys(
    ), "Optimizer name is not in PyTorch optimizers"
    return optimizers_dict[optimizer_name]
Example #9
0
def train(model: models.Model, optimizer: optimizers.Optimizer,
          train_instances: List[Dict[str, np.ndarray]],
          validation_sentences: List[List[str]],
          validation_trees: List[DependencyTree],
          parsing_system: ParsingSystem, vocabulary: Vocabulary,
          num_epochs: int,
          batch_size: int) -> Dict[str, Union[models.Model, str]]:
    """
    Trains a model on the given training instances as
    configured and returns the trained model.
    """
    print("\nGenerating Training batches:")
    train_batches = generate_batches(train_instances, batch_size)

    train_batches = [(batch["inputs"], batch["labels"])
                     for batch in train_batches]

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch}")
        # Training Epoch
        total_training_loss = 0
        generator_tqdm = tqdm(train_batches)
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            with tf.GradientTape() as tape:
                model_outputs = model(inputs=batch_inputs, labels=batch_labels)
                loss_value = model_outputs["loss"]
                grads = tape.gradient(loss_value, model.trainable_variables)

            clipped_grads = [tf.clip_by_norm(grad, 5) for grad in grads]
            optimizer.apply_gradients(
                zip(clipped_grads, model.trainable_variables))
            total_training_loss += loss_value
            description = ("Average training loss: %.2f " %
                           (total_training_loss / (index + 1)))
            generator_tqdm.set_description(description, refresh=False)

        # Validation evaluation
        print("Evaluating validation performance:")
        predicted_trees = predict(model, validation_sentences, parsing_system,
                                  vocabulary)
        evaluation_report = evaluate(validation_sentences, parsing_system,
                                     predicted_trees, validation_trees)
        print("\n" + evaluation_report)

    training_outputs = {"model": model, "evaluation_report": evaluation_report}
    return training_outputs
Example #10
0
    def _pre_compile(self, optimizer: Optimizer) -> Tuple[Optimizer, Union[bool, None]]:
        """
        Method to call before calling 'compile' to setup the run and inputs for using horovod.

        :param optimizer: The optimzier to compile. It will be wrapped in horovod's distributed optimizer:
                          'hvd.DistributedOptimizer'.

        :return: The updated parameters:
                 [0] = Wrapped optimizer.
                 [1] = The 'experimental_run_tf_function' parameter for 'compile' kwargs or 'None' if horovod should not
                       be used.

        :raise MLRunInvalidArgumentError: In case the optimizer was passed as a string.
        """
        # Check if needed to run with horovod:
        if self._hvd is None:
            return optimizer, None

        # Validate the optimizer input:
        if isinstance(optimizer, str):
            raise mlrun.errors.MLRunInvalidArgumentError(
                "When using horovod, the compile method is expecting an initialized optimizer instance and not a "
                "string."
            )

        # Setup the device to run on GPU if available:
        if (
            tf.config.experimental.list_physical_devices("GPU")
            and os.environ.get("CUDA_VISIBLE_DEVICES", "None") != "-1"
        ):
            # Pin each GPU to a single process:
            gpus = tf.config.experimental.list_physical_devices("GPU")
            if gpus:
                tf.config.experimental.set_visible_devices(
                    gpus[self._hvd.local_rank()], "GPU"
                )
                print(
                    f"Horovod worker #{self._hvd.rank()} is using GPU #{self._hvd.local_rank()}"
                )
        else:
            # No GPUs were found, or 'use_cuda' was false:
            os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
            print(f"Horovod worker #{self._hvd.rank()} is using CPU")

        # Adjust learning rate based on the number of GPUs:
        optimizer.lr = optimizer.lr * self._hvd.size()

        # Wrap the optimizer in horovod's distributed optimizer: 'hvd.DistributedOptimizer'.
        optimizer = self._hvd.DistributedOptimizer(optimizer)

        # Compile the model with `experimental_run_tf_function=False` to ensure Tensorflow uses the distributed
        # optimizer to compute the gradients:
        experimental_run_tf_function = False

        return optimizer, experimental_run_tf_function
Example #11
0
def train_step(
    inputs: tuple[Tensor, Tensor | float],
    dynamics: Dynamics,
    optimizer: Optimizer,
    loss_fn: Callable = LatticeLoss,
) -> tuple[Tensor, dict]:
    x_init, beta = inputs
    with tf.GradientTape() as tape:
        x_out, tmetrics = dynamics((to_u1(x_init), tf.constant(beta)))
        x_prop = tmetrics.pop('mc_states').proposed.x
        # x_prop = metrics.pop('mc_states').proposed.x
        loss = loss_fn(x_init=x_init, x_prop=x_prop, acc=tmetrics['acc'])

    grads = tape.gradient(loss, dynamics.trainable_variables)
    optimizer.apply_gradients(zip(grads, dynamics.trainable_variables))
    metrics = {
        'loss': loss,
    }
    for key, val in tmetrics.items():
        metrics[key] = val

    return to_u1(x_out), metrics
def train(model: models.Model,
          optimizer: optimizers.Optimizer,
          train_instances: List[Dict[str, np.ndarray]],
          validation_instances: List[Dict[str, np.ndarray]],
          num_epochs: int,
          batch_size: int,
          serialization_dir: str = None) -> tf.keras.Model:
    """
    Trains a model on the give training instances as configured and stores
    the relevant files in serialization_dir. Returns model and some important metrics.
    """

    print("\nGenerating Training batches:")
    train_batches = generate_batches(train_instances, batch_size)
    print("Generating Validation batches:")
    validation_batches = generate_batches(validation_instances, batch_size)

    train_batch_labels = [
        batch_inputs.pop("labels") for batch_inputs in train_batches
    ]
    validation_batch_labels = [
        batch_inputs.pop("labels") for batch_inputs in validation_batches
    ]

    tensorboard_logs_path = os.path.join(serialization_dir,
                                         f'tensorboard_logs')
    tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path)
    best_epoch_validation_accuracy = float("-inf")
    best_epoch_validation_loss = float("inf")
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch}")

        total_training_loss = 0
        total_correct_predictions, total_predictions = 0, 0
        generator_tqdm = tqdm(list(zip(train_batches, train_batch_labels)))
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            with tf.GradientTape() as tape:
                logits = model(**batch_inputs, training=True)["logits"]
                loss_value = cross_entropy_loss(logits, batch_labels)
                grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            total_training_loss += loss_value
            batch_predictions = np.argmax(tf.nn.softmax(logits,
                                                        axis=-1).numpy(),
                                          axis=-1)
            total_correct_predictions += (
                batch_predictions == batch_labels).sum()
            total_predictions += batch_labels.shape[0]
            description = (
                "Average training loss: %.2f Accuracy: %.2f " %
                (total_training_loss /
                 (index + 1), total_correct_predictions / total_predictions))
            generator_tqdm.set_description(description, refresh=False)
        average_training_loss = total_training_loss / len(train_batches)
        training_accuracy = total_correct_predictions / total_predictions

        total_validation_loss = 0
        total_correct_predictions, total_predictions = 0, 0
        generator_tqdm = tqdm(
            list(zip(validation_batches, validation_batch_labels)))
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            logits = model(**batch_inputs, training=False)["logits"]
            loss_value = cross_entropy_loss(logits, batch_labels)
            total_validation_loss += loss_value
            batch_predictions = np.argmax(tf.nn.softmax(logits,
                                                        axis=-1).numpy(),
                                          axis=-1)
            total_correct_predictions += (
                batch_predictions == batch_labels).sum()
            total_predictions += batch_labels.shape[0]
            description = (
                "Average validation loss: %.2f Accuracy: %.2f " %
                (total_validation_loss /
                 (index + 1), total_correct_predictions / total_predictions))
            generator_tqdm.set_description(description, refresh=False)
        average_validation_loss = total_validation_loss / len(
            validation_batches)
        validation_accuracy = total_correct_predictions / total_predictions

        if validation_accuracy > best_epoch_validation_accuracy:
            print(
                "Model with best validation accuracy so far: %.2f. Saving the model."
                % (validation_accuracy))
            classifier.save_weights(
                os.path.join(serialization_dir, f'model.ckpt'))
            best_epoch_validation_loss = average_validation_loss
            best_epoch_validation_accuracy = validation_accuracy

        with tensorboard_writer.as_default():
            tf.summary.scalar("loss/training",
                              average_training_loss,
                              step=epoch)
            tf.summary.scalar("loss/validation",
                              average_validation_loss,
                              step=epoch)
            tf.summary.scalar("accuracy/training",
                              training_accuracy,
                              step=epoch)
            tf.summary.scalar("accuracy/validation",
                              validation_accuracy,
                              step=epoch)
        tensorboard_writer.flush()

    metrics = {
        "training_loss": float(average_training_loss),
        "validation_loss": float(average_validation_loss),
        "training_accuracy": float(training_accuracy),
        "best_epoch_validation_accuracy":
        float(best_epoch_validation_accuracy),
        "best_epoch_validation_loss": float(best_epoch_validation_loss)
    }

    print("Best epoch validation accuracy: %.4f, validation loss: %.4f" %
          (best_epoch_validation_accuracy, best_epoch_validation_loss))

    return {"model": model, "metrics": metrics}
Example #13
0
    def __init__(self,
                 encoder: Encoder,
                 decoder: Decoder,
                 optimizer: Optimizer = None,
                 loss_function: Loss = None,
                 num_words: int = 10000,
                 batch_size: int = 512,
                 test_ratio: float = 0.3,
                 max_words_in_sentence: int = 20,
                 sentence_predictor: SentencePredictor = None,
                 checkpoint_dir:
                 str = "../../../data/german-english/seq2seq-checkpoints/",
                 checkpoint_steps: int = 1,
                 restore: bool = True):
        if optimizer is None:
            optimizer = Adam(learning_rate=tf.Variable(0.01),
                             beta_1=tf.Variable(0.9),
                             beta_2=tf.Variable(0.999),
                             epsilon=tf.Variable(1e-7))
            optimizer.iterations
            optimizer.decay = tf.Variable(0.0)
        if loss_function is None:
            loss_function = SparseCategoricalCrossentropy(reduction="none")
        if sentence_predictor is None:
            sentence_predictor = GreedyPredictor(
                max_words_in_sentence=max_words_in_sentence)

        self.optimizer = optimizer
        self.loss_function = loss_function
        self.encoder = encoder
        self.decoder = decoder
        self.num_words = num_words
        self.batch_size = batch_size
        self.test_ratio = test_ratio
        self.loss_per_epoch = []
        self.input_tokenizer, self.output_tokenizer = None, None
        self.input_word_index, self.output_word_index = None, None
        self.max_words_in_sentence = max_words_in_sentence
        self.sentence_predictor = sentence_predictor
        self.checkpoint_dir = checkpoint_dir
        self.checkpoint_steps = checkpoint_steps
        self.checkpoint = None
        self.checkpoint = tf.train.Checkpoint(encoder=self.encoder,
                                              decoder=self.decoder,
                                              optimizer=self.optimizer)
        self.checkpoint_manager = tf.train.CheckpointManager(
            self.checkpoint, self.checkpoint_dir, max_to_keep=3)
        if not os.path.exists(self.checkpoint_dir):
            os.mkdir(self.checkpoint_dir)
        if not os.path.exists(self.checkpoint_dir + "/tokenizer"):
            os.mkdir(self.checkpoint_dir + "/tokenizer")
        if restore and len(os.listdir(self.checkpoint_dir)) > 1:
            self.status = self.checkpoint.restore(
                self.checkpoint_manager.checkpoints[-2]).expect_partial()
            with open(self.checkpoint_dir + "/tokenizer/input.pickle",
                      "rb") as f:
                self.input_tokenizer = pickle.load(f)
            with open(self.checkpoint_dir + "/tokenizer/output.pickle",
                      "rb") as f:
                self.output_tokenizer = pickle.load(f)
            self.input_word_index = self.input_tokenizer.word_index
            self.output_word_index = self.output_tokenizer.word_index
Example #14
0
def train(model: models.Model,
          optimizer: optimizers.Optimizer,
          train_batches: List,
          validation_batches: List,
          num_epochs: int,
          serialization_dir: str = None,
          config: Dict = None,
          vocab: Dict = None) -> tf.keras.Model:
    tensorboard_logs_path = os.path.join(serialization_dir,
                                         f'tensorboard_logs')
    tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path)
    best_epoch_validation_accuracy = float("-inf")
    best_epoch_validation_loss = float("inf")
    # DFN performs better with a higher regularization lambda
    regularization_lambda = 1e-5 if model.__class__.__name__ != 'DFN' else 1e-3
    training_loss_plot = []
    validation_loss_plot = []
    training_accuracy_plot = []
    validation_accuracy_plot = []
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch}")
        total_training_loss = 0
        total_correct_predictions, total_predictions = 0, 0
        generator_tqdm = tqdm(train_batches)
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            with tf.GradientTape() as tape:
                logits = model(batch_inputs, training=True)
                loss_value = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        batch_labels, logits))
                loss_value += regularization_lambda * tf.reduce_sum(
                    [tf.nn.l2_loss(x) for x in model.weights])
                grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            total_training_loss += loss_value
            batch_predictions = tf.math.argmax(tf.nn.softmax(logits, axis=-1),
                                               axis=-1)
            total_correct_predictions += tf.math.reduce_sum(
                tf.cast(batch_predictions == batch_labels, dtype=tf.int64))
            total_predictions += batch_labels.get_shape()[0]
            description = (
                "Average training loss: %.2f Accuracy: %.2f " %
                (total_training_loss /
                 (index + 1), total_correct_predictions / total_predictions))
            generator_tqdm.set_description(description, refresh=False)
        average_training_loss = total_training_loss / len(train_batches)
        training_accuracy = total_correct_predictions / total_predictions

        total_validation_loss = 0
        total_correct_predictions, total_predictions = 0, 0
        generator_tqdm = tqdm(validation_batches)
        for index, (batch_inputs, batch_labels) in enumerate(generator_tqdm):
            logits = model(batch_inputs, training=False)
            loss_value = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    batch_labels, logits))
            total_validation_loss += loss_value
            batch_predictions = tf.math.argmax(tf.nn.softmax(logits, axis=-1),
                                               axis=-1)
            total_correct_predictions += tf.math.reduce_sum(
                tf.cast(batch_predictions == batch_labels, dtype=tf.int64))
            total_predictions += batch_labels.get_shape()[0]
            description = (
                "Average validation loss: %.2f Accuracy: %.2f " %
                (total_validation_loss /
                 (index + 1), total_correct_predictions / total_predictions))
            generator_tqdm.set_description(description, refresh=False)
        average_validation_loss = total_validation_loss / len(
            validation_batches)
        validation_accuracy = total_correct_predictions / total_predictions

        if validation_accuracy > best_epoch_validation_accuracy:
            if serialization_dir is not None:
                print(
                    "Model with best validation accuracy so far: %.2f. Saving the model."
                    % (validation_accuracy))
                save_model(model, config, vocab, serialization_dir)
            best_epoch_validation_loss = average_validation_loss
            best_epoch_validation_accuracy = validation_accuracy

        with tensorboard_writer.as_default():
            tf.summary.scalar("{} loss/training".format(
                model.__class__.__name__),
                              average_training_loss,
                              step=epoch)
            tf.summary.scalar("{} loss/validation".format(
                model.__class__.__name__),
                              average_validation_loss,
                              step=epoch)
            tf.summary.scalar("{} accuracy/training".format(
                model.__class__.__name__),
                              training_accuracy,
                              step=epoch)
            tf.summary.scalar("{} accuracy/validation".format(
                model.__class__.__name__),
                              validation_accuracy,
                              step=epoch)
        tensorboard_writer.flush()
        training_loss_plot.append(float(average_training_loss))
        training_accuracy_plot.append(float(training_accuracy))
        validation_loss_plot.append(float(average_validation_loss))
        validation_accuracy_plot.append(float(validation_accuracy))

    metrics = {
        "training_loss": float(average_training_loss),
        "validation_loss": float(average_validation_loss),
        "training_accuracy": float(training_accuracy),
        "best_epoch_validation_accuracy":
        float(best_epoch_validation_accuracy),
        "best_epoch_validation_loss": float(best_epoch_validation_loss)
    }

    plot_data = {
        'training_loss': training_loss_plot,
        'training_accuracy': training_accuracy_plot,
        'validation_loss': validation_loss_plot,
        'validation_accuracy': validation_accuracy_plot
    }

    print("Best epoch validation accuracy: %.4f, validation loss: %.4f" %
          (best_epoch_validation_accuracy, best_epoch_validation_loss))

    return {"model": model, "metrics": metrics, 'plot_data': plot_data}
Example #15
0
    def update_weights(self, optimizer: Optimizer, network: Network, batch, weight_decay: float):
        """
        First predict the values for the given observations and actions
        Then calculate the loss between those predictions and results of the MCTS stored in the replay buffer
        Before optimizing the weights, add a L2-Regularization to the loss (adds a penalty if the weights get to big)
        Finally updating all weights in the network (all 3 models) with the given optimizer.
        """

        def scale_gradient(tensor, scale):
            return tensor * scale + tf.stop_gradient(tensor) * (1. - scale)

        def get_loss_callback():
            loss = tf.convert_to_tensor([0], dtype=float)

            for image, actions, targets in batch:
                # Prevents the computation of the image to be taken into account for computing gradients
                image = tf.stop_gradient(image, 'get_image')
                # Transform real observation to hidden state, then predict value, reward and policy distribution for it

                value, reward, policy_logits, hidden_state = network.initial_inference(image)
                predictions = [(1.0, value, reward, policy_logits)]

                # Recurrent steps, from action and previous hidden state.
                for action in actions:
                    # Build the input for the recurrent inference and stop the gradients at the input
                    action_tensor = tf.convert_to_tensor(action.action_id, dtype=float)
                    action_layer = tf.ones(hidden_state.shape) * action_tensor
                    hidden_state_with_action = tf.concat([hidden_state, action_layer], axis=3)
                    hidden_state_with_action = tf.stop_gradient(hidden_state_with_action, 'get_hidden_state_with_action')

                    value, reward, policy_logits, hidden_state = network.recurrent_inference(hidden_state_with_action)

                    hidden_state = scale_gradient(hidden_state, 0.5)
                    predictions.append((1.0 / len(actions), value, reward, policy_logits))

                # Calculate the loss between the predictions and the target
                for prediction, target in zip(predictions, targets):
                    gradient_scale, value, reward, policy_logits = prediction
                    target_value, target_reward, target_policy = target

                    assert value.shape == target_value.shape
                    assert reward.shape == target_reward.shape
                    assert policy_logits.shape == target_policy.shape

                    l = (
                            self.scalar_loss(value, target_value) +
                            self.scalar_loss(reward, target_reward) +
                            self.scalar_loss(policy_logits, target_policy)
                    )

                    loss += scale_gradient(l, gradient_scale)

            print('Loss before l2: ', loss)

            """
            FIXME: L2 loss is way to high (sometimes even inf) 
            # Add L2-Regularization to the overall loss
            for weights in network.get_weights_callback()():
                loss += weight_decay * tf.nn.l2_loss(weights)
            """

            # Optimize the current weights
            print('Loss after l2: ', loss)

            return loss

        optimizer.minimize(get_loss_callback, network.get_weights_callback())
        network.train_step += 1
Example #16
0
def train_step(step: int, optimizer: Optimizer, network: Network, batch,
               weight_decay: float):
    """
    Batch is 3-tuple of:
    Observation (N,80,80,1) for atari or (N,4) for cartpole
    Actions (N,K+1) - k=0 is a dummy action -1
    Value Targets (N,K+1)
    Reward Targets (N,K+1)
    Policy Targets (N,K+1,A)
    Masks (N,K+1)
    """
    value_loss_metric = tf.keras.metrics.Mean()
    reward_loss_metric = tf.keras.metrics.Mean()
    policy_loss_metric = tf.keras.metrics.Mean()

    value_pred_mean = tf.keras.metrics.Mean()
    reward_pred_mean = tf.keras.metrics.Mean()
    policy_pred_dist = []
    value_pred_k0_dist, value_pred_k1_dist, value_pred_kf_dist = [], [], []
    reward_pred_k0_dist, reward_pred_k1_dist, reward_pred_kf_dist = [], [], []

    value_target_mean = tf.keras.metrics.Mean()
    reward_target_mean = tf.keras.metrics.Mean()
    policy_target_dist = []
    value_target_k0_dist, value_target_k1_dist, value_target_kf_dist = [], [], []
    reward_target_k0_dist, reward_target_k1_dist, reward_target_kf_dist = [], [], []

    observations, actions, target_values, target_rewards, target_policies, masks, policy_masks = batch
    with tf.GradientTape() as f_tape, tf.GradientTape(
    ) as g_tape, tf.GradientTape() as h_tape:
        loss = 0
        K = actions.shape[1]  # seqlen
        for k in range(K):
            # Targets
            z, u, pi, mask_, policy_mask_ = target_values[:,
                                                          k], target_rewards[:,
                                                                             k], target_policies[:,
                                                                                                 k], masks[:,
                                                                                                           k], policy_masks[:,
                                                                                                                            k]
            mask, policy_mask = tf.squeeze(mask_), tf.squeeze(
                policy_mask_)  # (N,) rather than (N,1)

            if k == 0:
                # Initial step, from the real observation.
                value, reward, policy_logits, hidden_state = network.initial_inference(
                    observations, convert_to_scalar=False)
                gradient_scale = 1.0
            else:
                # All following steps
                # masked_actions = tf.boolean_mask(actions[:,k], mask, axis=0)
                # hidden_state_masked = tf.boolean_mask(hidden_state, mask, axis=0)
                value, reward, policy_logits, hidden_state = network.recurrent_inference(
                    hidden_state, actions[:, k], convert_to_scalar=False)
                gradient_scale = 1.0 / (K - 1)
                hidden_state = scale_gradient(
                    hidden_state,
                    0.5)  # Todo: is this compatible with masking??

            # Masking
            # Be careful with masking, if all values are masked it returns len 0 tensor

            z_masked = z  #tf.boolean_mask(z, mask, axis=0)
            u_masked = u  #tf.boolean_mask(u, mask, axis=0)
            pi_masked = tf.boolean_mask(
                pi, policy_mask,
                axis=0)  # policy mask is mask but rolled left by 1

            value_masked = value  #tf.boolean_mask(value, mask, axis=0)
            reward_masked = reward  #tf.boolean_mask(reward, mask, axis=0)
            policy_logits_masked = tf.boolean_mask(policy_logits,
                                                   policy_mask,
                                                   axis=0)

            # z_masked = z*mask_
            # u_masked = u*mask_
            # pi_masked = pi*mask_ # policy mask is mask but rolled left by 1
            #
            # value_masked = value*mask_
            # reward_masked = reward*mask_
            # policy_logits_masked = policy_logits*mask_

            value_loss = ce_loss(
                value_masked,
                scalar_to_support(z_masked, network.value_support_size), mask)
            reward_loss = ce_loss(
                reward_masked,
                scalar_to_support(u_masked, network.reward_support_size), mask)
            policy_loss = ce_loss(policy_logits_masked, pi_masked, mask)
            combined_loss = 1.0 * value_loss + 1.0 * reward_loss + 1.0 * policy_loss

            loss += scale_gradient(combined_loss, gradient_scale)

            if tf.math.is_nan(loss):
                print("Loss is NaN")
                pdb.set_trace()

            # Metric logging for tensorboard
            value_loss_metric(value_loss)
            reward_loss_metric(reward_loss)
            policy_loss_metric(policy_loss)

            scalar_value = support_to_scalar(value, network.value_support_size)
            scalar_reward = support_to_scalar(reward,
                                              network.reward_support_size)
            policy_probs = tf.nn.softmax(policy_logits)

            if (step + 1) % 100 == 0:
                print("break")

            value_pred_mean(scalar_value)
            reward_pred_mean(scalar_reward)
            policy_pred_dist.append(policy_probs * policy_mask_)
            if k == 0:
                value_pred_k0_dist.append(scalar_value)
                reward_pred_k0_dist.append(scalar_reward)
            if k == 1:
                value_pred_k1_dist.append(scalar_value)
                reward_pred_k1_dist.append(scalar_reward)
            if k == K - 1:
                value_pred_kf_dist.append(scalar_value)
                reward_pred_kf_dist.append(scalar_reward)

            value_target_mean(z_masked)
            reward_target_mean(u_masked)
            policy_target_dist.append(pi * policy_mask_)
            if k == 0:
                value_target_k0_dist.append(z)
                reward_target_k0_dist.append(u)
            if k == 1:
                value_target_k1_dist.append(z)
                reward_target_k1_dist.append(u)
            if k == K - 1:
                value_target_kf_dist.append(z)
                reward_target_kf_dist.append(u)

#         total_loss_metric(loss)
#         total_reward()

# Todo: Eventually we want to use keras layer regularization or AdamW
        weight_reg_loss = 0
        for weights in network.get_weights():
            weight_reg_loss += weight_decay * tf.add_n([
                tf.nn.l2_loss(w) for w in weights
            ])  # sum of l2 norm of weight matrices - also consider tf.norm
#         weight_reg_loss_metric(weight_reg_loss)
#         loss += weight_reg_loss

# Is there a cleaner way to implement this?
    f_grad = f_tape.gradient(loss, network.f.trainable_variables)
    g_grad = g_tape.gradient(loss, network.g.trainable_variables)
    h_grad = h_tape.gradient(loss, network.h.trainable_variables)
    optimizer.apply_gradients(zip(f_grad, network.f.trainable_variables))
    optimizer.apply_gradients(zip(g_grad, network.g.trainable_variables))
    optimizer.apply_gradients(zip(h_grad, network.h.trainable_variables))

    # optimizer.minimize(loss=loss, var_list=network.cb_get_variables())

    return value_loss_metric.result(), reward_loss_metric.result(), policy_loss_metric.result(), weight_reg_loss, loss, f_grad, g_grad, h_grad, \
           value_pred_mean.result(), reward_pred_mean.result(), policy_pred_dist, value_target_mean.result(), reward_target_mean.result(), policy_target_dist, \
           actions[:,1:], \
           value_pred_k0_dist, value_pred_k1_dist, value_pred_kf_dist, value_target_k0_dist, value_target_k1_dist, value_target_kf_dist, \
           reward_pred_k0_dist, reward_pred_k1_dist, reward_pred_kf_dist, reward_target_k0_dist, reward_target_k1_dist, reward_target_kf_dist
Example #17
0
def train(model: models.Model,
          optimizer: optimizers.Optimizer,
          train_instances: List[Dict[str, np.ndarray]],
          validation_instances: List[Dict[str, np.ndarray]],
          num_epochs: int,
          max_length_train: int,
          max_length_validation: int,
          embedding_dim: int,
          batch_size: int,
          number_of_clusters: int,
          hidden_units_in_autoencoder_layers: List[int],
          serialization_dir: str = None) -> tf.keras.Model:
    """
    Trains a model on the give training instances as configured and stores
    the relevant files in serialization_dir. Returns model and some important metrics.
    """
    tensorboard_logs_path = os.path.join(serialization_dir,
                                         f'tensorboard_logs')
    tensorboard_writer = tf.summary.create_file_writer(tensorboard_logs_path)
    #best_epoch_validation_accuracy = float("-inf")
    best_epoch_loss = float("inf")
    best_epoch_validation_silhouette_score = float("-inf")
    KMeans_1 = Clustering.KMeansClustering

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch}")
        total_training_loss = 0
        #total_abstract_train_part_hidden_value = []
        total_abstract_train_part_hidden_value = 0
        total_abstract_validation_part_hidden_value = []
        k = 0

        list_of_trainable_variables = []
        print("\nGenerating Training batches:")
        train_batches, embed_dim_train = generate_batches(
            train_instances, batch_size, max_length_train, embedding_dim)
        print("Generating Validation batches:")
        validation_batches, embed_dim_validation = generate_batches(
            validation_instances, batch_size, max_length_validation,
            embedding_dim)
        noise_data_train = np.random.normal(
            loc=0,
            scale=0.1,
            size=[len(train_instances) * 2, max_length_train, embed_dim_train])
        noise_data_validation = np.random.normal(
            loc=0,
            scale=0.1,
            size=[
                len(validation_instances) * 2, max_length_validation,
                embed_dim_validation
            ])
        train_batch_tickers = [
            batch_inputs.pop("Ticker") for batch_inputs in train_batches
        ]
        validation_batch_tickers = [
            batch_inputs.pop("Ticker") for batch_inputs in validation_batches
        ]
        train_batch_dates = [
            batch_inputs.pop("Date") for batch_inputs in train_batches
        ]
        validation_batch_dates = [
            batch_inputs.pop("Date") for batch_inputs in validation_batches
        ]
        train_batch_asset_returns = [
            batch_inputs.pop('Asset_Returns') for batch_inputs in train_batches
        ]
        validation_batch_asset_returns = [
            batch_inputs.pop('Asset_Returns')
            for batch_inputs in validation_batches
        ]
        generator_tqdm = tqdm(list(zip(train_batches, train_batch_tickers)))
        for index, (batch_inputs, batch_tickers) in enumerate(generator_tqdm):
            with tf.GradientTape() as tape:
                # if epoch == 0:
                noise = noise_data_train[k * batch_size * 2:(k + 1) *
                                         batch_size * 2, :]

                fake_input, train_real_fake_labels = get_fake_sample(
                    batch_inputs['inputs'])
                real_batch_inputs = batch_inputs['inputs']
                batch_inputs['inputs'] = np.concatenate(
                    (batch_inputs['inputs'], fake_input), axis=0)
                batch_inputs['real_fake_label'] = train_real_fake_labels
                batch_inputs['noise'] = noise
                batch_inputs[
                    'Validation_Inputs'] = validation_batch_asset_returns[
                        index]
                batch_inputs['Training_Inputs'] = train_batch_asset_returns[
                    index]
                batch_inputs['batch_tickers'] = batch_tickers
                loss, f_new = model(batch_inputs, epoch, training=True)
                if epoch % 2 == 0:  #
                    batch_inputs['F_Updated_Value'] = f_new
                """
                if epoch == 0:
                    part_hidden_val = np.array(hidden_state).reshape(-1,2*sum(hidden_units_in_autoencoder_layers) )#np.sum(config.hidden_size) * 2
                    W = part_hidden_val.T
                    U, sigma, VT = np.linalg.svd(W)
                    sorted_indices = np.argsort(sigma)
                    topk_evecs = VT[sorted_indices[:-number_of_clusters - 1:-1], :]
                    F_new = topk_evecs.T
                    batch_inputs['F_Updated_Value'] = F_new
                    #total_abstract_train_part_hidden_value.append(part_hidden_val)
                    total_abstract_train_part_hidden_value = part_hidden_val
                    km = KMeans(n_clusters=number_of_clusters)
                    cluster_labels_training = km.fit_predict(X=total_abstract_train_part_hidden_value)
                    estimated_return_vector, estimated_covariance_matrix, cluster_weights, cluster_variance, trainable_variables_loss, list_of_trainable_variables = Calculate_Cluster_Weights.calculate_train_data_return_vector_for_all_stocks(
                        cluster_labels_training, real_batch_inputs)
                    asset_weights = [value for key,value in cluster_weights.items()]
                    #asset_weights = np.perm(asset_weights,cluster_labels_training)
                    estimated_return_vector = tf.concat(estimated_return_vector, axis=0)
                    validation_period_out_of_sample_returns = validation_batches[index]['inputs']
                    mean_of_validation_period_out_of_sample_returns = tf.math.reduce_mean(
                        validation_period_out_of_sample_returns, axis=1)
                    #difference_vector = estimated_return_vector - mean_of_validation_period_out_of_sample_returns
                    difference_vector = tf.math.pow(0,2)
                    #difference_vector = tf.tensordot(difference_vector,asset_weights,axis=0)
                    #mean_squared_loss_of_in_sample_and_out_of_sample_returns = tf.keras.losses.mean_squared_error(
                        #mean_of_validation_period_out_of_sample_returns, estimated_return_vector)
                    #loss += difference_vector
                    silhouette_score_training = silhouette_score(total_abstract_train_part_hidden_value,
                                                                 cluster_labels_training)
                    loss += 1e-4  * trainable_variables_loss
                    # calculate_train_data_return_vector_for_all_stocks(cluster_labels_training,)

                else:
                    batch_inputs.pop('F_Updated_Value',None)
                """

                regularization_loss = 0
                #list_of_trainable_variables =
                for var in model.trainable_variables:
                    #print(var)
                    list_of_trainable_variables.append(var)
                    #if (var.name.find("Returns Vector") == -1) and (var.name.find("Estimated_Covariance") == -1):
                    regularization_loss += tf.math.add_n([tf.nn.l2_loss(var)])
                loss += 1e-4 * (regularization_loss)
                k += 1

                grads = tape.gradient(loss, list_of_trainable_variables)
            optimizer.apply_gradients(zip(grads, list_of_trainable_variables))
            total_training_loss += loss
            description = ("Average training loss: %.2f " %
                           (total_training_loss / len(train_instances)))
            generator_tqdm.set_description(description, refresh=False)
        average_training_loss = total_training_loss / len(train_instances)
        if average_training_loss < best_epoch_loss:
            print(
                "Model with best training loss  so far: %.2f. Saving the model."
                % (average_training_loss))
            best_epoch_loss = average_training_loss
            model.save_weights(os.path.join(serialization_dir, f'model.ckpt'))
        """
        if epoch % 10 == 0 and epoch != 0:
            print(total_abstract_train_part_hidden_value[0],total_abstract_train_hidden_state[0])#
            concatenated_total_abstract_train_part_hidden_value = tf.concat(total_abstract_train_part_hidden_value,axis=1)
            concatenated_total_abstract_train_part_hidden_value = concatenated_total_abstract_train_part_hidden_value.numpy()
            silhouette_score_for_k_means_clustering,cluster_labels, kmeans = KMeans.cluster_hidden_states(concatenated_total_abstract_train_part_hidden_value)
        """
        """
        total_validation_loss = 0

        generator_tqdm = tqdm(list(zip(validation_batches, validation_batch_tickers)))
        k=0

        for index, (batch_inputs,batch_tickers) in enumerate(generator_tqdm):
            #if epoch == 0:
            noise = noise_data_validation[k * batch_size * 2: (k + 1) * batch_size * 2, :]

            fake_input, train_real_fake_labels = get_fake_sample(batch_inputs['inputs'])
            batch_inputs['inputs'] = np.concatenate((batch_inputs['inputs'], fake_input), axis=0)
            batch_inputs['real_fake_label'] = train_real_fake_labels
            batch_inputs['noise'] = noise
            loss, hidden_state = model(batch_inputs, training=False)

            #if epoch % 10 == 0 and epoch != 0:
            #if epoch == 0:
            part_validation_hidden_val = np.array(hidden_state).reshape(-1, 2*sum(hidden_units_in_autoencoder_layers))
            total_abstract_validation_part_hidden_value.append(part_validation_hidden_val)
                #print(total_abstract_validation_part_hidden_value)
                #total
                #kmeans.predict()#
                #print("")
            k += 1
                #grads = tape.gradient(loss, model.trainable_variables)
            #optimizer.apply_gradients(zip(grads, model.trainable_variables))
            total_validation_loss += loss
            description = ("Average validation loss: %.2f "
                       % (total_validation_loss / (index + 1)))
            generator_tqdm.set_description(description, refresh=False)
        average_validation_loss = total_validation_loss / len(validation_batches)
        """
        #if epoch % 10 == 0 and epoch != 0:
        #print(total_abstract_validation_part_hidden_value[0])  #
        """
        concatenated_total_abstract_validation_part_hidden_value = tf.concat(total_abstract_validation_part_hidden_value,
                                                                        axis=0)
        concatenated_total_abstract_validation_part_hidden_value = concatenated_total_abstract_validation_part_hidden_value.numpy()
        km = KMeans(n_clusters=number_of_clusters)
        cluster_labels_validation = km.fit_predict(X=concatenated_total_abstract_validation_part_hidden_value)
        silhouette_score_validation = silhouette_score(concatenated_total_abstract_validation_part_hidden_value,cluster_labels_validation)

        print(silhouette_score_validation,average_validation_loss)
        if silhouette_score_validation > best_epoch_validation_silhouette_score and average_validation_loss < best_epoch_validation_loss:
            print("Model with best validation silhouette score so far: %.2f. Saving the model."
                  % (silhouette_score_validation))
            print("Model with best validation loss  so far: %.2f. Saving the model."
                  % (average_validation_loss))
            model.save_weights(os.path.join(serialization_dir, f'model.ckpt'))
            best_epoch_validation_silhouette_score = silhouette_score_validation
            best_epoch_validation_loss = average_validation_loss
        #best_epoch_validation_accuracy = validation_accuracy
        """
        with tensorboard_writer.as_default():
            tf.summary.scalar("loss/training",
                              average_training_loss,
                              step=epoch)
            #tf.summary.scalar("loss/validation", average_validation_loss, step=epoch)
            #tf.summary.scalar("accuracy/training", training_accuracy, step=epoch)
            #tf.summary.scalar("accuracy/validation", best_epoch_validation_silhouette_score, step=epoch)
        tensorboard_writer.flush()

    metrics = {
        "training_loss": float(average_training_loss),
        #"validation_loss": float(average_validation_loss),
        #"training_accuracy": float(training_accuracy),
        #"best_epoch_validation_accuracy": float(best_epoch_validation_silhouette_score),
        #"best_epoch_validation_loss": float(best_epoch_validation_loss)
    }
    #print("Best epoch validation loss: %.4f" % best_epoch_validation_loss)

    return {"model": model, "metrics": metrics}