Beispiel #1
0
def log_validation(sess: tf.Session, step: int,
                   summary_writer: tf.summary.FileWriter,
                   test_model: BindedModel):
    logits, predict, lb, val_loss, losses, test_summary = sess.run(
        [
            test_model.logits,
            test_model.predict,
            test_model.dq.batch_labels,
            test_model.ctc_loss,
            test_model.ctc_loss_unaggregated,
            test_model.summary,
        ],
        feed_dict={
            test_model.learning_phase: 0,
        },
        options=tf.RunOptions(
            timeout_in_ms=20 * 1000,  # Single op should complete in 20s
        ),
    )
    logger.info(
        f"Logits[{logits.shape}]: describe:{pformat(stats.describe(logits, axis=None))}"
    )
    if summary_writer is not None:
        summary_writer.add_summary(test_summary, step)
    return val_loss
Beispiel #2
0
def _log_continuous_evaluation(tb_writer: tf.summary.FileWriter,
                               main_metric: str,
                               eval_result: Evaluation,
                               seen_instances: int,
                               epoch: int,
                               max_epochs: int,
                               execution_results: List[ExecutionResult],
                               train: bool = False,
                               dataset_name: str = None) -> None:
    """Log the evaluation results and the TensorBoard summaries."""

    color, prefix = ("yellow", "train") if train else ("blue", "val")

    if dataset_name is not None:
        prefix += "_" + dataset_name

    eval_string = _format_evaluation_line(eval_result, main_metric)
    eval_string = "Epoch {}/{}  Instances {}  {}".format(
        epoch, max_epochs, seen_instances, eval_string)
    log(eval_string, color=color)

    if tb_writer:
        for result in execution_results:
            for summaries in [
                    result.scalar_summaries, result.histogram_summaries,
                    result.image_summaries
            ]:
                if summaries is not None:
                    tb_writer.add_summary(summaries, seen_instances)

        external_str = \
            tf.Summary(value=[tf.Summary.Value(tag=prefix + "_" + name,
                                               simple_value=value)
                              for name, value in eval_result.items()])
        tb_writer.add_summary(external_str, seen_instances)
Beispiel #3
0
def evaluate_players(p1: Player,
                     p2: Player,
                     games_per_battle=100,
                     num_battles=100,
                     writer: tf.summary.FileWriter = None,
                     silent: bool = False):
    p1_wins = []
    p2_wins = []
    draws = []
    game_number = []
    game_counter = 0

    for i in range(num_battles):
        p1win, p2win, draw = battle(p1, p2, games_per_battle, silent)
        p1_wins.append(p1win)
        p2_wins.append(p2win)
        draws.append(draw)
        game_counter = game_counter + 1
        game_number.append(game_counter)
        if writer is not None:
            summary = tf.Summary(value=[
                tf.Summary.Value(tag='Player 1 Win', simple_value=p1win),
                tf.Summary.Value(tag='Player 2 Win', simple_value=p2win),
                tf.Summary.Value(tag='Draw', simple_value=draw)
            ])
            writer.add_summary(summary, game_counter)

    return game_number, p1_wins, p2_wins, draws
Beispiel #4
0
def train_step(session: tf.Session,
               model: CharCnnLstm,
               train_info: TrainInfo,
               summary_writer: tf.summary.FileWriter,
               logger: logging.Logger,
               report_step: int = 20):
    if train_info.batch % report_step == 0:
        loss_value, _, gradient_norm, step, loss_acc_summary = session.run(
            [
                model.loss, model.train_op, model.global_norm,
                model.global_step, model.loss_acc_summary
            ], {model.lstm_dropout: 0.5})
        summary_writer.add_summary(loss_acc_summary, step)
        log_level = logging.INFO
    else:
        loss_value, _, gradient_norm, step = session.run(
            [model.loss, model.train_op, model.global_norm, model.global_step],
            {model.lstm_dropout: 0.5})
        log_level = logging.DEBUG
    elapsed = time.time() - train_info.start_time
    logger.log(
        log_level,
        f'{step:6}: {train_info.epoch} [{train_info.batch:5}/{train_info.nb_of_batches:5}], '
        f'train_loss = {loss_value:6.8f} elapsed = {elapsed:.4f}s, grad.norm={gradient_norm:6.8f}'
    )

    return step
Beispiel #5
0
def summarize_epoch(epoch: int, sess: tf.Session, learning_rate: tf.Variable,
                    bn_decay: tf.Variable, loss_sum: float,
                    batches_per_epoch: float, acc_sum: float,
                    train_iou_val: float, train_writer: tf.summary.FileWriter,
                    train_iou_reset: tf.Operation):
    """
    summarizes train metrics of one epoch

    :param epoch: index of epoch
    :param sess: tf session
    :param learning_rate:
    :param bn_decay:
    :param loss_sum: accumulated loss
    :param batches_per_epoch: number of batches used in an epoch
    :param acc_sum: accumulated accuracy
    :param train_iou_val: accumulated train iou
    :param train_writer: train summary writer
    :param train_iou_reset: operation to reset train iou
    :return:
    """
    lr, bn_d = sess.run([learning_rate, bn_decay])
    epoch_loss = loss_sum / batches_per_epoch
    epoch_acc = acc_sum / batches_per_epoch
    epoch_iou = train_iou_val
    print(
        f"mean loss: {epoch_loss:.4f}\tmean acc: {epoch_acc:.4f}\tmean iou: {epoch_iou:.4f}"
    )
    summary = get_tf_summary(epoch_loss, epoch_acc, epoch_iou)
    summary.value.add(tag="learning_rate", simple_value=lr)
    summary.value.add(tag="bn_decay", simple_value=bn_d)
    train_writer.add_summary(summary, epoch)
    # reset accumulator
    sess.run(train_iou_reset)
Beispiel #6
0
def save_summaries(metrics: Dict[str, float], writer: tf.summary.FileWriter,
                   global_step: int) -> None:
    """Log metrics with a tf.summary.FileWriter."""
    values = [
        tf.Summary.Value(tag=k, simple_value=v) for k, v in metrics.items()
    ]
    summary = tf.Summary(value=values)
    writer.add_summary(summary, global_step)
    writer.flush()
Beispiel #7
0
def np_scalar_to_summary(tag: str, scalar: np.array, np_step: np.array,
                         summary_file_writer: tf.summary.FileWriter):
    """
    Adds a numpy scalar to the logfile.
    :param tag: The tensorboard plot title.
    :param scalar: The scalar value to be recordd in that plot.
    :param np_step: The x-Axis step
    :param summary_file_writer: The summary writer used to do the recording.
    """
    mse_net_summary = tf.Summary.Value(tag=tag, simple_value=scalar)
    mse_net_summary = tf.Summary(value=[mse_net_summary])
    summary_file_writer.add_summary(mse_net_summary, global_step=np_step)
Beispiel #8
0
    def _train_epoch(self, sess, global_step_train, decay_step, thresholds,
                     saver, filewriter: tf.summary.FileWriter, optimizer_vars):

        loss_arr_train = []

        def _any_is_nan(ths_to_check):
            return any(np.isnan(th) for th in ths_to_check)

        time.sleep(
            1
        )  # prevent overlapping between the output of tqdm and the standard stream output

        for batch, _ in tqdm(
                self.__data_generator_train.generate_batches(self.batch_size)):

            learning_rate_val = self.learning_rate * \
                          np.exp(-global_step_train * self.learning_rate_decay) * \
                          np.abs(np.cos(np.pi * decay_step / 4 / self.reinit_adam_after_n_batches)) + 10.0 ** -7

            loss_value, summary_node_val, ths_vals = self._train_step(
                sess, batch, learning_rate_val, thresholds)
            loss_arr_train.append(loss_value)

            filewriter.add_summary(summary_node_val, global_step_train)

            global_step_train += 1
            decay_step += 1

            if _any_is_nan(ths_vals):
                print(
                    "Some thresholds is None, restore previous trainable value"
                )
                saver.restore(sess, tf.train.latest_checkpoint(self.save_dir))
                init_opt_vars_op = tf.variables_initializer(optimizer_vars)
                sess.run(init_opt_vars_op)
            else:
                if global_step_train % self.save_each == 0:
                    saver.save(
                        sess,
                        os.path.join(self.save_dir,
                                     "ckpt{}".format(global_step_train)))

            if global_step_train % self.reinit_adam_after_n_batches == 0:
                init_opt_vars_op = tf.variables_initializer(optimizer_vars)
                sess.run(init_opt_vars_op)
                decay_step = 0

        time.sleep(
            1
        )  # prevent overlapping between the output of tqdm and the standard stream output

        return global_step_train, decay_step, float(np.mean(loss_arr_train))
Beispiel #9
0
    def perform_validation(self,
                           sess,
                           iteration: int,
                           writer: tf.summary.FileWriter,
                           results_dir: str = None):
        """
        Performs validation over the test data and register the results in the form of summaries that can be interpreted
        by Tensorboard. The prepare method must have been called at least once before using this method, otherwise,
        an Exception may occur.

        :param sess: the current session
        :param iteration: the current iteration number over the training data
        :param writer: a FileWriter properly configured
        :param results_dir: (Optional) the directory where the predicted labels (i.e. the results of the model) should
        be saved. This can be useful for analysing the detailed results after the training is complete.
        If the parameter is not provided then the labels are not stored. If provided, they will be stored in:
        results_dir/predictions/{iteration}.npy as a numpy array
        :return: None
        """
        sess.run(self.test_iterator.initializer)
        sess.run(
            tf.variables_initializer(tf.get_default_graph().get_collection(
                tf.GraphKeys.METRIC_VARIABLES)))
        op_list = [self.model.get_output()]
        op_list.extend(self.update_tensors)
        true_labels = []
        predicted = []
        while True:
            try:
                test_images, test_target = sess.run([self.test_x, self.test_y])
                results = sess.run(op_list,
                                   feed_dict={
                                       self.tensor_x: test_images,
                                       self.tensor_y: test_target,
                                       self.model.use_dropout: 0.0
                                   })
                true_labels.extend(test_target)
                predicted.extend(results[0])
            except OutOfRangeError:
                print(
                    "Finished validation of iteration {}...".format(iteration))
                break

        for tensor in self.scalar_tensors:
            summary = sess.run(tensor)
            writer.add_summary(summary, iteration)
        self._calculate_external_metrics(sess, iteration, writer, true_labels,
                                         predicted)

        Tester.save_predictions(iteration, results_dir, predicted)
Beispiel #10
0
  def _train_step(self, obs, states, rewards, masks, actions, values, update,
                  writer: tf.summary.FileWriter = None, features=None, rewards_bonuses=None):
    """
    applies a training step to the model

    :param obs: ([float]) The input observations
    :param states: ([float]) The states (used for recurrent policies)
    :param rewards: ([float]) The rewards from the environment
    :param masks: ([bool]) Whether or not the episode is over (used for recurrent policies)
    :param actions: ([float]) The actions taken
    :param values: ([float]) The logits values
    :param update: (int) the current step iteration
    :param writer: (TensorFlow Summary.writer) the writer for tensorboard
    :return: (float, float, float) policy loss, value loss, policy entropy
    """
    advs = rewards - values
    cur_lr = None
    for _ in range(len(obs)):
      cur_lr = self.learning_rate_schedule.value()
    assert cur_lr is not None, "Error: the observation input array cannon be empty"

    rewards_bonuses = rewards_bonuses if self.use_sf else np.zeros_like(rewards_bonuses)
    td_map = {self.train_model.obs_ph: obs, self.actions_ph: actions, self.advs_ph: advs,
              self.rewards_ph: rewards + rewards_bonuses, self.learning_rate_ph: cur_lr,
              self.successor_feature_ph: features}
    if states is not None:
      td_map[self.train_model.states_ph] = states
      td_map[self.train_model.masks_ph] = masks

    if writer is not None:
      # run loss backprop with summary, but once every 10 runs save the metadata (memory, compute time, ...)
      if (1 + update) % 10 == 0:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        run_metadata = tf.RunMetadata()
        summary, policy_loss, value_loss, policy_entropy, _, sf_loss = self.sess.run(
          [self.summary, self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop,
           self.sf_loss], td_map, options=run_options, run_metadata=run_metadata)
        writer.add_run_metadata(run_metadata, 'step%d' % (update * (self.n_batch + 1)))
      else:
        summary, policy_loss, value_loss, policy_entropy, _, sf_loss = self.sess.run(
          [self.summary, self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop,
           self.sf_loss], td_map)
      writer.add_summary(summary, update * (self.n_batch + 1))

    else:
      policy_loss, value_loss, policy_entropy, _, sf_loss = self.sess.run(
        [self.pg_loss, self.vf_loss, self.entropy, self.apply_backprop, self.sf_loss], td_map)

    return policy_loss, value_loss, policy_entropy, sf_loss
Beispiel #11
0
def train_network(network: DeepRegretNetwork, advantage_memory: buffer.Reservoir,
                  action_indexer: neural_game.ActionIndexer,
                  info_set_vectoriser: neural_game.InfoSetVectoriser,
                  current_time: int,
                  writer: tf.summary.FileWriter,
                  batch_size=1024, num_sgd_updates=4000):
    """Trains the given network from scratch

    Args:
        network: DeepRegretNetwork. The network to train.
        advantage_memory: Reservoir. Each entry should be an AdvantageMemoryElement.
        action_indexer: ActionIndexer. Turns actions into indices.
        info_set_vectoriser: InfoSetVectoriser. Turns information set ids into vectors.
        current_time: int. The current time.
        writer: tf.summary.FileWriter.
        batch_size: int. The size to use for each batch.
        num_sgd_updates: int. The number of sgd updates to do.

    Returns:
        mean_loss: float. The mean loss over the period.
    """
    # First reset the network.
    network.initialise()

    losses = []

    print("Training.")
    indices = list(range(len(advantage_memory)))
    for i in tqdm(range(num_sgd_updates)):
        # Shuffle the advantage memory.
        batch_indices = np.random.choice(indices, batch_size, replace=True)

        batch = advantage_memory.get_elements(batch_indices)

        loss, summary = network.train(batch, action_indexer, info_set_vectoriser, current_time=current_time)
        writer.add_summary(summary, network.global_step)
        losses.append(loss)

        # Early stopping.
        if early_stopping_water_mark(losses, num_attempts=20):
            print("Losses: {}".format(losses))
            print("Early stopping.")
            break

    print("Losses % through the data: {}".format(
        [losses[int(frac / 100 * len(losses))] for frac in [0.0, 25.0, 50.0, 75.0, 99.99]]
    ))

    return np.min(losses)
Beispiel #12
0
    def save_loss(sess, loss, iteration: int, writer: tf.summary.FileWriter):
        """
        Saves the loss value into the summary file for TensorBoard

        :param sess: the current session
        :param loss: the loss value for the current iteration
        :param iteration: the current iteration number over the training data
        :param writer: a FileWriter properly configured
        :return: None
        """
        loss_tensor = tf.get_default_graph().get_tensor_by_name(
            'loss_tensor:0')
        loss_scalar = tf.get_default_graph().get_tensor_by_name('loss:0')
        summary = sess.run(loss_scalar, feed_dict={loss_tensor: loss})
        writer.add_summary(summary, iteration)
Beispiel #13
0
    def _calculate_external_metrics(self, sess, iteration: int,
                                    writer: tf.summary.FileWriter, true_labels,
                                    predictions):
        """
        Calculates the metrics that make use of external libraries. It also saves the results
        on TensorBoard

        :param sess: the current session
        :param iteration: the current iteration number over the training data
        :param writer: a FileWriter properly configured
        :param true_labels: array-like structure with the true labels
        :param predictions: array-like structure with the predicted labels. Must be between 0 and 1
        :return: a list containing the calculated values of each metric
        """
        auc = metrics.roc_auc_score(true_labels, predictions, average='macro')
        auc_summary = self.external_tensors["AUC"]
        summary = sess.run(auc_summary,
                           feed_dict={self.aux_tensors["AUC_AUX"]: auc})
        writer.add_summary(summary, iteration)
Beispiel #14
0
def save_episode_to_summary(summary_writer: tensorflow.summary.FileWriter,
                            episode: int, step: int, time: float, reward: float, epsilon: float) -> None:
    """
    Adds summary of episode to summary file
    NOTE: to view summary execute "tensorboard --logdir output/[algorithm]/tensorboard_summary"
    :param summary_writer: summary writer
    :param episode: number of episode
    :param step: total steps of episode
    :param time: time needed to complete episode
    :param reward: total reward received in episode
    :param epsilon: value of epsilon at the end of episode
    """
    # create summary of episode
    summary = tensorflow.Summary()
    summary.value.add(tag='Reward', simple_value=float(reward))
    summary.value.add(tag='Step', simple_value=int(step))
    summary.value.add(tag='Time', simple_value=float(time))
    summary.value.add(tag='Epsilon', simple_value=float(epsilon))
    # add summary to file writer
    summary_writer.add_summary(summary, episode)
    summary_writer.flush()
Beispiel #15
0
def run_validations(session: tf.Session, model: CharCnnLstm, vocab: Vocab,
                    target_tensor: tf.Tensor,
                    summary_writer: tf.summary.FileWriter, step: int,
                    logger: logging.Logger):
    targets = []
    predictions = []
    loss = 0
    nb_batches = 0
    while True:
        try:
            batch_loss, batch_target, batch_predictions, variable_summaries = session.run(
                [
                    model.loss, target_tensor, model.predictions,
                    model.variable_summaries
                ], {model.lstm_dropout: 0.})
            loss += batch_loss
            targets.append(batch_target.ravel())
            predictions.append(batch_predictions.ravel())
            nb_batches += 1
        except tf.errors.OutOfRangeError:
            break
    targets = np.concatenate(targets)
    predictions = np.concatenate(predictions)
    loss /= nb_batches
    accuracy = np.sum((predictions == targets)
                      & (targets != 0)) / np.sum(targets != 0)
    logger.info(
        f'Validation loss = {loss:6.8f}, validation accuracy = {accuracy:6.8f}'
    )
    logger.info('\n' +
                classification_report_with_labels(targets, predictions, vocab))

    summary = tf.Summary(value=[
        tf.Summary.Value(tag='loss', simple_value=loss),
        tf.Summary.Value(tag='accuracy', simple_value=accuracy),
    ])
    summary_writer.add_summary(summary, step)
    summary_writer.add_summary(variable_summaries, step)
Beispiel #16
0
    def perform_validation(self, sess, iteration: int,
                           writer: tf.summary.FileWriter):
        """
        Performs validation over the test data and register the results in the form of summaries that can be interpreted
        by Tensorboard. The prepare method must have been called at least once before using this method, otherwise,
        an Exception may occur.

        :param sess: the current session
        :param iteration: the current iteration number over the training data
        :param writer: a FileWriter properly configured
        :return: None
        """
        streaming_accuracy_update = tf.get_default_graph().get_tensor_by_name(
            'accuracy_metric/update_op:0')
        streaming_accuracy_scalar = tf.get_default_graph().get_tensor_by_name(
            'accuracy:0')

        sess.run(self.test_iterator.initializer)
        sess.run(
            tf.variables_initializer(tf.get_default_graph().get_collection(
                tf.GraphKeys.METRIC_VARIABLES)))
        while True:
            try:
                test_images, test_target = sess.run([self.test_x, self.test_y])
                sess.run(
                    [streaming_accuracy_update],
                    feed_dict={
                        self.tensor_x: test_images,
                        self.tensor_y: test_target,
                        self.model.use_dropout: 0.0
                    })
            except OutOfRangeError:
                print(
                    "Finished validation of iteration {}...".format(iteration))
                break

        summary = sess.run(streaming_accuracy_scalar)
        writer.add_summary(summary, iteration)
Beispiel #17
0
def eval_model(is_training: tf.Variable, sess: tf.Session, best_iou: float,
               val_loss: tf.Tensor, val_acc: tf.Tensor,
               val_iou_update: tf.Operation, val_iou: tf.Tensor,
               val_iou_reset: tf.Operation, val_writer: tf.summary.FileWriter,
               epoch: int, saver: tf.train.Saver) -> float:
    """
    evaluates model with one pass over validation set

    :param is_training: tf var which indicates if model is training
    :param sess: tf sess
    :param best_iou: best validation iou until now
    :param val_loss: val loss tensor
    :param val_acc: val accuracy tensor
    :param val_iou_update: val iou update operation
    :param val_iou: val iou tensor
    :param val_iou_reset: val iou reset operation
    :param val_writer: val summary writer
    :param epoch: index of current epoch
    :param saver: tf model saver
    :return: new best iou
    """
    acc_sum, loss_sum = 0, 0

    # toggle training off
    assign_op = is_training.assign(False)
    sess.run(assign_op)

    val_batches = N_VAL_SAMPLES // BATCH_SIZE
    print(f"starting evaluation {val_batches} batches")

    for j in range(val_batches):
        loss_val, acc_val, _, val_iou_val = sess.run(
            [val_loss, val_acc, val_iou_update, val_iou])
        print(
            f"\tevaluation epoch: {epoch:03d}\tbatch {j:03d} eval:"
            f"\tloss: {loss_val:.4f}\taccuracy: {acc_val:.4f}\taccumulated iou {val_iou_val:.4f}"
        )
        acc_sum += acc_val
        loss_sum += loss_val

    # validation summary
    loss = loss_sum / val_batches
    acc = acc_sum / val_batches
    iou = val_iou_val
    summary = get_tf_summary(loss, acc, iou)
    val_writer.add_summary(summary, epoch)
    print(
        f"evaluation:\tmean loss: {loss:.4f}\tmean acc: {acc:.4f}\tmean iou {iou:.4f}\n"
    )

    # save model if it is better
    if iou > best_iou:
        best_iou = iou
        save_path = saver.save(
            sess,
            os.path.join(LOG_DIR + "_train",
                         f"best_model_epoch_{epoch:03d}.ckpt"))
        print(f"Model saved in file: {save_path}\n")

    # reset accumulator
    sess.run(val_iou_reset)

    # toggle training on
    assign_op = is_training.assign(True)
    sess.run(assign_op)

    return best_iou
Beispiel #18
0
    def train(self,
              sess: tf.Session,
              train_X,
              train_y,
              valid_X,
              valid_y,
              train_summary_writer: tf.summary.FileWriter = None,
              valid_summary_writer: tf.summary.FileWriter = None,
              saver: tf.train.Saver = None,
              ckpt_dir=None,
              no_embedding=False):
        self.logger.info("begin to train...")
        train_data = BatchDataSet(train_X, train_y,
                                  self.train_options.batch_size,
                                  self.train_options.over_sample)
        valid_data = BatchDataSet(valid_X, valid_y,
                                  self.train_options.batch_size,
                                  self.train_options.over_sample)
        run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)

        sess.run(tf.variables_initializer(self.every_train_reset_ops))
        early_stop_cnt = 0
        valid_acc = [1e-18]
        valid_losses = [1e18]
        i_epoch = 0
        while i_epoch < self.train_options.max_epoch:
            if check_early_stop(early_stop_cnt, self.train_options.patient):
                self.logger.info("early stop!!")
                break
            try:
                x_idx, y_idx = train_data.get_next()
                feed_dict = {
                    self.x_idx: x_idx,
                    self.y_idx: y_idx,
                    self.dropout_keep_prob:
                    self.train_options.dropout_keep_prob
                }
                if no_embedding:
                    _, global_step, loss, predictions = sess.run(
                        [
                            self.train_op_no_embedding, self.global_step,
                            self.loss, self.predictions
                        ],
                        feed_dict,
                        options=run_options)
                else:
                    _, global_step, loss, predictions = sess.run(
                        [
                            self.train_op, self.global_step, self.loss,
                            self.predictions
                        ],
                        feed_dict,
                        options=run_options)
                accuracy, sc_pre_cnt, sc_pre_right, sc_cnt = cal_accuracy(
                    predictions, y_idx)

                feed_dict[self.temp_loss] = loss
                feed_dict[self.temp_acc] = accuracy

                summaries = sess.run(self.train_summary_op, feed_dict)
                self.logger.info(
                    "[Train] epoch {}, step {}, nb_batch {}, loss {:g}, acc {:g}. sc_pre_cnt {}, sc_pre_rate {:g}, sc_recall {:g}"
                    .format(
                        i_epoch,
                        global_step,
                        len(y_idx),
                        loss,
                        accuracy,
                        sc_pre_cnt,
                        0 if sc_pre_cnt == 0 else sc_pre_right / sc_pre_cnt,
                        0 if sc_cnt == 0 else sc_pre_right / sc_cnt,
                    ))
                if train_summary_writer:
                    train_summary_writer.add_summary(summaries, global_step)

                if global_step % self.train_options.check_steps == 0:
                    val_accuracy, val_loss = self.evaluate(
                        sess,
                        valid_data,
                        epoch=i_epoch,
                        global_step=global_step,
                        valid_summary_writer=valid_summary_writer)
                    early_stop_cnt += 1
                    if val_loss < valid_losses[-1]:  # save best performance
                        valid_acc.append(val_accuracy)
                        valid_losses.append(val_loss)
                        early_stop_cnt = 0
                        if saver and ckpt_dir:
                            self.save(sess, saver, ckpt_dir, global_step)
                            self.logger.info("model improving and saved !")

            except IndexError:
                self.logger.info('done reading train data.')
                train_data.init_iterator()
                i_epoch += 1
Beispiel #19
0
def train_iterations(sess: tf.Session,
                     model: models.basics.BasicSiamese,
                     batch_data: data.BatchData,
                     pairs: pd.DataFrame,
                     summary_writer: tf.summary.FileWriter,
                     batch_size: int,
                     epochs: int):
    """
    Execute the train iterations with all the epochs

    :param sess: Tensorflow session
    :param model: Model with a :func:`models.BasicModel.feed_dict` method to get the ``feed_dict`` for
                  ``sess.run(...)``
    :param batch_data: Class containing the information for the batch data, it's necessary because it contains
                       information regarding the mean and std of the radiomic features.
    :param pairs: List of pairs that can be trained. Usually this pairs can be obtained by calling
                  :func:`data.SplitPairs.folds` or :func:`data.SplitPairs.train_test_split`
    :param summary_writer: Summary writer provided by Tensorflow to show the training progress
    :param batch_size: Batch size for training Since usually images are used, the whole dataset does not fit in
                       memory so, setting the batch size, can avoid memory overflows.

                       The pairs will be generated by having a number of different ids among all pairs equal to
                       the batch size.
    :param epochs: Number of epochs, passes through the complete dataset, should be done when training
    """

    # Train iterations
    final_iterations = 0
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        total_pairs = len(pairs)*(settings.TOTAL_ROTATIONS if model.uses_images() else 1)
        for i, batch in enumerate(batch_data.batches(pairs,
                                                     batch_size=batch_size,
                                                     load_images=model.uses_images(),
                                                     train=True)):

            total_pairs -= len(batch.pairs)

            # Execute graph operations but only write summaries once every 5 iterations
            if final_iterations % 5 == 0:
                _, c_index_result, loss, summary = sess.run(
                    [
                        model.minimizer,
                        model.c_index,
                        model.total_loss,
                        model.summary
                    ],
                    feed_dict=model.feed_dict(batch)
                )
                logger.info(f"Epoch: {epoch:>3}, Batch: {i:>4}, size: {len(batch.pairs):>5}, remaining: "
                            f"{total_pairs:>6}, "
                            f"c-index: {c_index_result:>#5.3}, loss: {loss:>#5.3}")
                summary_writer.add_summary(summary, final_iterations)
            else:
                _, c_index_result, loss = sess.run(
                    [
                        model.minimizer,
                        model.c_index,
                        model.total_loss
                    ],
                    feed_dict=model.feed_dict(batch)
                )

            final_iterations += 1