def main():
    """Trains the OpenNMT Transformer or translate with it, according
    to command line arguments"""

    model, checkpoint, optimizer, learning_rate = init_model()
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("run",
                        choices=["train", "translate"],
                        help="Run type.")
    parser.add_argument("--src",
                        required=True,
                        help="Path to the source file.")
    parser.add_argument("--tgt", help="Path to the target file.")
    parser.add_argument("--valsrc", help="Path to the validation source file.")
    parser.add_argument("--valtgt", help="Path to the validation target file.")
    parser.add_argument("--bpe",
                        help="Enables Byte-Pair Encoding",
                        action="store_true")
    parser.add_argument("--vocab_size", help="Vocabulary Size", default=16000)
    parser.add_argument("--bpe_vocab_size",
                        help="BPE Vocabulary Size",
                        default=4000)
    parser.add_argument("--seed",
                        help="Random seed for the experiment",
                        default=1234)
    parser.add_argument(
        "--monosrc",
        help="Monolingual data source (Target language).",
        type=str,
        default="",
    )
    parser.add_argument("--btsrc", help="Back-translation source file")
    parser.add_argument("--bttgt", help="Back-translation target file")

    parser.add_argument("--monolen",
                        help="Number of monolingual samples to consider.",
                        default=20000)
    parser.add_argument(
        "--bpe_combined",
        help="Use combined BPE vocabulary for both languages",
        action="store_true",
        default=False,
    )

    parser.add_argument(
        "--validate_now",
        help="Skips training and validate at current checkpoint",
        action="store_true",
    )
    parser.add_argument("--output",
                        help="Filename for translated output.",
                        default="output.txt")

    parser.add_argument(
        "--model_dir",
        default="checkpoint",
        help="Directory where checkpoint are written.",
    )
    args = parser.parse_args()

    # Tensorflow random seed.
    tf.random.set_seed(args.seed)

    combined = args.bpe_combined
    if args.monosrc != "":
        combined = True  # Combined vocabulary must be used for monolingual data!
        tf.get_logger().info(
            "Using combined BPE vocabulary since monolingual data is used!")
    src = args.src
    tgt = args.tgt
    valsrc = args.valsrc
    valtgt = args.valtgt
    src_vocab, tgt_vocab = get_vocab_file_names(args.model_dir)
    vocab_size = int(args.vocab_size)

    if args.bpe:
        # Prepare Byte-Pair Encore model + Byte-Pair Encoded Files.
        vocab_size = int(args.bpe_vocab_size)
        if args.run == "train":
            prepare_bpe_models(src,
                               tgt,
                               combined=combined,
                               vocab_size=vocab_size)
            valsrc, _ = prepare_bpe_files(valsrc, valtgt, combined=combined)
        src, tgt = prepare_bpe_files(src, tgt, combined=combined)

    # Rebuilds the vocabulary from scratch using only the input data.
    if args.run == "train":
        if not combined:
            build_vocabulary(src, src_vocab, vocab_size)
            build_vocabulary(tgt, tgt_vocab, vocab_size)
        else:
            # Combined vocabulary!
            concat_files(src, tgt, "all.tmp")
            build_vocabulary("all.tmp", src_vocab, vocab_size)
            build_vocabulary("all.tmp", tgt_vocab, vocab_size)

    # Add back-tranlated data if requested.
    if args.btsrc is not None:
        btsrc = args.btsrc
        bttgt = args.bttgt
        if bttgt is None:
            tf.get_logger().error("Back-translation target must be supplied")
            exit()
        if args.bpe:
            btsrc, bttgt = prepare_bpe_files(btsrc, bttgt, combined=combined)
        else:
            tf.get_logger.info(
                "Warning: Back-translation was not tested without BPE. There could be bugs!"
            )
        tmp_btsrc = "btsrc.tmp"
        tmp_bttgt = "bttgt.tmp"
        concat_files(btsrc, src, tmp_btsrc)
        concat_files(bttgt, tgt, tmp_bttgt)
        shuffle_file(tmp_btsrc, seed=args.seed)
        shuffle_file(tmp_bttgt, seed=args.seed)
        src = tmp_btsrc
        tgt = tmp_bttgt

    # Add additionnal monolingual data if requested.
    if args.monosrc != "":
        tmp_monosrc = "monosrc.tmp"
        tmp_monotgt = "monotgt.tmp"
        if not args.bpe:
            tf.get_logger().error(
                "Monolingual data can only be used with BPE!")
            exit()
        prepare_bpe_files(args.monosrc, None, combined=combined)
        concat_files(src,
                     args.monosrc + ".bpe",
                     tmp_monosrc,
                     lines1=None,
                     lines2=args.monolen)
        concat_files(tgt,
                     args.monosrc + ".bpe",
                     tmp_monotgt,
                     lines1=None,
                     lines2=args.monolen)
        shuffle_file(tmp_monosrc, seed=args.seed, inplace=True)
        shuffle_file(tmp_monotgt, seed=args.seed, inplace=True)
        src = tmp_monosrc
        tgt = tmp_monotgt

    init_data_config(model, src_vocab, tgt_vocab)

    checkpoint_manager = init_checkpoint_manager_and_load_latest_checkpoint(
        checkpoint, args.model_dir)

    if args.run == "train":
        tf.get_logger().info(
            f"Training on {src}, {tgt}\nValidating on {valsrc}, {valtgt}.\n" +
            f"Vocab = {src_vocab}, {tgt_vocab}\n BPE={args.bpe}")
        train(
            model,
            optimizer,
            learning_rate,
            src,
            tgt,
            checkpoint_manager,
            validation_source_file=valsrc,
            validation_target_file=valtgt,
            validate_now=args.validate_now,
            bpe=args.bpe,
            bpe_combined=combined,
        )
    elif args.run == "translate":
        temp = tempfile.NamedTemporaryFile()
        tf.get_logger().info(f"Translating {src} file to {temp}")
        with tempfile.NamedTemporaryFile() as f:
            temp = f.name
        translate(model, src, output_file=temp)
        if args.bpe:
            output_file_name = decode_bpe_file(temp)
        else:
            import shutil

            shutil.copyfile(temp, output_file_name)
        tf.get_logger().info(f"BPE decoded {temp} file to {output_file_name}")
Beispiel #2
0
#https://github.com/marload/DeepRL-TensorFlow2
import tensorflow as tf
import tensorflow.keras.layers as kl
import logging

tf.get_logger().setLevel(logging.ERROR)

import datetime
import gym
import argparse
import numpy as np
from collections import deque
import random
from gym import wrappers
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from visdom import Visdom


def argument_parse():
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', type=str, default="CartPole-v0")
    parser.add_argument('--gamma', type=float, default=0.99)
    parser.add_argument('--learning_rate', type=float, default=0.005)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--epsilon_init', type=float, default=1.0)
    parser.add_argument('--epsilon_min', type=float, default=0.01)
    parser.add_argument('--replay_memory_capacity', type=int, default=10000)
    parser.add_argument('--epsilon_decay_end_step', type=int, default=15000)
    parser.add_argument('--max_steps', type=int, default=30000)
Beispiel #3
0
    def evaluate(self, input_data, labels, inputs_err=None, labels_err=None):
        """
        Evaluate neural network by provided input data and labels and get back a metrics score

        :param input_data: Data to be trained with neural network
        :type input_data: ndarray
        :param labels: Labels to be trained with neural network
        :type labels: ndarray
        :param inputs_err: Error for input_data (if any), same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :param labels_err: Labels error (if any)
        :type labels_err: Union([NoneType, ndarray])
        :return: metrics score dictionary
        :rtype: dict
        :History: 2018-May-20 - Written - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)

        if labels_err is None:
            labels_err = np.zeros_like(labels)

        input_data = {"input": input_data}
        labels = {"output": labels}

        # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again)
        if self.input_normalizer is None:
            self.input_normalizer = Normalizer(mode=self.input_norm_mode)
            self.labels_normalizer = Normalizer(mode=self.labels_norm_mode)

            norm_data = self.input_normalizer.normalize(input_data)
            self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels
            norm_labels = self.labels_normalizer.normalize(labels)
            self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels
        else:
            norm_data = self.input_normalizer.normalize(input_data, calc=False)
            norm_labels = self.labels_normalizer.normalize(labels, calc=False)

        # No need to care about Magic number as loss function looks for magic num in y_true only
        norm_input_err = inputs_err / self.input_std['input']
        norm_labels_err = labels_err / self.labels_std['output']

        norm_data.update({
            "input_err": norm_input_err,
            "labels_err": norm_labels_err
        })
        norm_labels.update({"variance_output": norm_labels["output"]})

        total_num = input_data['input'].shape[0]
        eval_batchsize = self.batch_size if total_num > self.batch_size else total_num
        steps = total_num // self.batch_size if total_num > self.batch_size else 1

        start_time = time.time()
        print("Starting Evaluation")

        # suppress pfor warning from TF
        old_level = tf.get_logger().level
        tf.get_logger().setLevel('ERROR')

        evaluate_generator = BayesianCNNDataGenerator(
            batch_size=eval_batchsize,
            shuffle=False,
            steps_per_epoch=steps,
            data=[norm_data, norm_labels])

        scores = self.keras_model.evaluate(evaluate_generator)

        tf.get_logger().setLevel(old_level)

        if isinstance(scores, float):  # make sure scores is iterable
            scores = list(str(scores))
        outputname = self.keras_model.output_names
        funcname = self.keras_model.metrics_names

        print(
            f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed'
        )

        return list_to_dict(funcname, scores)
Beispiel #4
0
import tensorflow as tf
import tensorflow.keras as keras

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

tf.get_logger().setLevel('INFO')


class MLP:
    def __init__(self):
        self.model = None
        self.lr = 0.01
        self.loss = 'categorical_crossentropy'
        self.metrics = ['accuracy']
        self.optimizer = tf.keras.optimizers.SGD(lr=self.lr, decay=self.lr/100,
                                                 momentum=0.9)

    def build(self):
        self.model = tf.keras.models.Sequential()
        self.model.add(tf.keras.layers.Dense(units=256, activation='relu',
                                             input_shape=(784, )))
        self.model.add(tf.keras.layers.Dropout(0.2))

        self.model.add(tf.keras.layers.Dense(units=128, activation='relu'))
        self.model.add(tf.keras.layers.Dropout(0.2))
Beispiel #5
0
import tensorflow as tf

# Import TensorFlow Datasets
import tensorflow_datasets as tfds
tfds.disable_progress_bar()

# Helper libraries
import math
import numpy as np
import matplotlib.pyplot as plt

import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

dataset, metadata = tfds.load('fashion_mnist', as_supervised=True, with_info=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal',      'Shirt',   'Sneaker',  'Bag',   'Ankle boot']
               
num_train_examples = metadata.splits['train'].num_examples
num_test_examples = metadata.splits['test'].num_examples
print("Number of training examples: {}".format(num_train_examples))
print("Number of test examples:     {}".format(num_test_examples))

def normalize(images, labels):
  images = tf.cast(images, tf.float32)
  images /= 255
  return images, labels
Beispiel #6
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    logger = tf.get_logger()
    logger.propagate = False

    news_config = GroverConfig.from_json_file(FLAGS.config_file)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(","):
        input_files.extend(tf.gfile.Glob(input_pattern))

    # tf.logging.info("*** Input Files ***")
    # for input_file in input_files:
    #     tf.logging.info("  %s" % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=None,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = model_fn_builder(news_config, init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                num_tpu_cores=FLAGS.num_tpu_cores,
                                eval_batch_size=FLAGS.eval_batch_size
                                )

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        params={'model_dir': FLAGS.output_dir}
    )
    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        train_input_fn = input_fn_builder(
            input_files=input_files,
            seq_length=FLAGS.max_seq_length,
            is_training=True)

        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        eval_input_fn = input_fn_builder(
            input_files=input_files,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
        )

        result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
Beispiel #7
0
def main():
    # # write results to file
    # out_dir = os.path.join(args.exp + '_save_dir', 'results')
    # from pathlib import Path
    # Path(out_dir).mkdir(exist_ok=True)
    # out_f = os.path.join(out_dir, 'run%s.txt' % args.suffix)
    # load model
    tf.logging.log(tf.logging.INFO, 'starting the run')
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
    tf.get_logger().setLevel('INFO')
    tf.logging.set_verbosity(tf.logging.INFO)
    name = "_".join(
        [args.in_data, args.ood_data,
         os.path.dirname(args.ckpt_file)])
    model = tf.make_template('model', model_spec)
    tf.logging.log(tf.logging.INFO, 'initializing')
    initializer = tf.global_variables_initializer()
    tf.logging.log(tf.logging.INFO, 'initialized model')

    # log_probs_in, ar_in, cdfs_in = get_preds(model, args, args.in_data, 'test', log_prob_func)
    # log_probs_ood, ar_ood, cdfs_ood = get_preds(model, args, args.ood_data, 'test', log_prob_func)
    # log_probs_train, ar_train, cdfs_train = get_preds(model, args, args.in_data, 'train', log_prob_func)

    # # # LR
    # log_probs_pixel_in = get_log_probs(model, args, args.in_data, 'test')  # (N,32,32,3)
    # log_probs_in = np.mean(log_probs_pixel_in, axis=(1, 2))
    # np.save('intermediate/' + name + '_log_probs_in.npy', log_probs_in)
    # log_probs_pixel_ood = get_log_probs(model, args, args.ood_data, 'test')
    # log_probs_ood = np.mean(log_probs_pixel_ood, axis=(1, 2))
    # np.save('intermediate/' + name + '_log_probs_ood.npy', log_probs_ood)

    # complexity_in = get_complexity(args, args.in_data, 'test')
    # complexity_ood = get_complexity(args, args.ood_data, 'test')
    # np.save('intermediate/' + name + '_complexity_ood.npy', complexity_ood)
    # np.save('intermediate/' + name + '_complexity_ood.npy', complexity_ood)
    # print(log_probs_in, log_probs_ood)
    # print(len(log_probs_in),len(log_probs_ood),len(complexity_in), len(complexity_ood))
    # auc, auc_llr = compute_auc_llr(log_probs_in, log_probs_ood, complexity_in, complexity_ood)
    # tf.logging.log(tf.logging.INFO, f'LL: {auc}')
    # tf.logging.log(tf.logging.INFO, f'LR: {auc_llr}')
    # with open(f'results/{name}.txt', 'a') as f:
    #     f.write(f'LL: {auc}\n')
    #     f.write(f'LR: {auc_llr}\n')
    # # # # TT
    # log_probs_pixel_train = get_log_probs(model, args, args.in_data, 'train')
    # log_probs_train = np.mean(log_probs_pixel_train, axis=(1, 2))
    # np.save('intermediate/' + name + '_log_probs_train.npy', log_probs_train)

    # train_entropy = get_entropy(log_probs_train)
    # typical_ts_in = list(map(abs, log_probs_in - train_entropy))
    # typical_ts_ood = list(map(abs, log_probs_ood - train_entropy))
    # print('before', typical_ts_in, typical_ts_ood)
    # # want higher to be better
    # print('-1', np.array(typical_ts_in) * -1, np.array(typical_ts_ood) * -1)
    # auc_tt = compute_auc(np.array(typical_ts_in) * -1, np.array(typical_ts_ood) * -1)
    # tf.logging.log(tf.logging.INFO, f'TT: {auc_tt}')
    # with open(f'results/{name}.txt', 'a') as f:
    #     f.write(f'TT: {auc_tt}')
    # # # WN
    # ar_in = get_ar(model, args, args.in_data, 'test')
    # ar_ood = get_ar(model, args, args.ood_data, 'test')
    # ar_train = get_ar(model, args, args.in_data, 'train')

    # wn_in, wn_ood = time_series_test(np.array(ar_train), np.array(ar_in), np.array(ar_ood), 'bp')
    # print(len(wn_in), len(wn_ood))
    # auc_wn = compute_auc(wn_in * -1, wn_ood * -1)
    # print(f'WN: {auc_wn}')
    # with open(f'results/{name}.txt', 'a') as f:
    #     f.write(f'UNIF: {auc_wn}')
    # UNIF
    unifs_in = get_cdf_transform(model, args, args.in_data,
                                 'test')  # (B,32,32,3)
    np.save('intermediate/' + name + '_unifs_samples.npy', unifs_in)
Beispiel #8
0
    eval_spec = tf.estimator.EvalSpec(
        input_fn.eval_input_fn(feature_config),
        steps=None if FLAGS.eval_steps < 0 else FLAGS.eval_steps,
        throttle_secs=FLAGS.throttle_secs)

    run_config = tf.estimator.RunConfig(
        model_dir=FLAGS.model_path,
        save_checkpoints_steps=FLAGS.checkpoint_steps)

    estimator = tf.estimator.Estimator(model_fn=model_fn.model_fn,
                                       config=run_config,
                                       params={
                                           'feature_config': feature_config,
                                       })

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    _, shard_id = shard_info.get_shard_info()
    if 'TF_CONFIG' not in os.environ or shard_id == 0:
        logging.info("begin the final evaluation:")
        metrics = estimator.evaluate(input_fn.eval_input_fn(feature_config))
        print(metrics)
        estimator.export_saved_model(FLAGS.model_path,
                                     input_fn.build_serving_fn(feature_config))


if __name__ == '__main__':
    tf.get_logger().setLevel("INFO")
    app.run(main)
Beispiel #9
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

# REF [site] >>
#	http://opennmt.net/
#	https://github.com/OpenNMT/OpenNMT-tf

import logging
import tensorflow as tf
import tensorflow_addons as tfa
import opennmt as onmt

tf.get_logger().setLevel(logging.INFO)


# REF [file] >> ${OpenNMT-tf_HOME}/examples/library/minimal_transformer_training.py
def minimal_transformer_training_example():
    run_type = 'train'  # Run type: 'train' or 'translate'.
    train_features_filepath = './toy-ende/src-train.txt'  # Path to the source file.
    train_labels_filepath = './toy-ende/tgt-train.txt'  # Path to the target file.
    eval_features_filepath = './toy-ende/src-val.txt'  # Path to the source file.
    eval_labels_filepath = './toy-ende/tgt-val.txt'  # Path to the target file.
    source_vocabulary_filepath = './toy-ende/src-vocab.txt'  # Path to the source vocabulary.
    target_vocabulary_filepath = './toy-ende/tgt-vocab.txt'  # Path to the target vocabulary.
    model_dir_path = './checkpoint'  # Directory where checkpoint are written.

    # See http://opennmt.net/OpenNMT-tf/configuration.html for a complete specification of the configuration.
    config = {
        'model_dir': model_dir_path,
        'data': {
            'source_vocabulary': source_vocabulary_filepath,
Beispiel #10
0
def block_print(log_level):
    """Disables print function when current logging level > log_level."""
    if tf.get_logger().getEffectiveLevel() > log_level:
        sys.stdout = open(os.devnull, 'w')
from collections import defaultdict
from datetime import datetime
from pathlib import Path
from pprint import pprint
from typing import List, Union, Dict, Optional, Any

import numpy as np
import tensorflow as tf
from tqdm import tqdm

# See https://github.com/tensorflow/tensorflow/issues/32180
# once this issue will be fixed I can use tf.lite.TFLiteConverter.from_keras_model
from keras_detection.utils.tflite_converter_shitfix import from_keras_model

LOGGER = tf.get_logger()
keras = tf.keras

TFLITE_SUFFIX = ".tflite"
# No quantization, float32
TFLITE_F32_SUFFIX = ".f32.tflite"
# Dynamic range quantized
TFLITE_DR_SUFFIX = ".dr.quantized.tflite"
# Fixed range quantized
TFLITE_FR_SUFFIX = ".fr.quantized.tflite"


class TFLiteModel:
    def __init__(self, tflite_model: Union[bytes, str, Path]):
        self.tflite_model = tflite_model
        interpreter, predict_fn = create_tflite_predict_fn(tflite_model)
        self.interpreter = interpreter
Beispiel #12
0
    def get_suggestions(self, client_id: Text):
        """Gets a list of suggested Trials.

        Args:
            client_id: An ID that identifies the `Tuner` requesting a `Trial`.
                `Tuners` that should run the same trial (for instance, when
                running a multi-worker model) should have the same ID. If
                multiple suggestTrialsRequests have the same tuner_id, the
                service will return the identical suggested trial if the trial
                is PENDING, and provide a new trial if the last suggest trial
                was completed.

        Returns:
            A list of Trials, This may be an empty list in case that a finite
            search space has been exhausted, if max_num_trials = 1000 has been
            reached, or if there are no longer any trials that match a supplied
            Context.

        Raises:
            SuggestionInactiveError: Indicates that a suggestion was requested
                from an inactive study. Note that this is NOT raised when a
                finite Study runs out of suggestions. In such a case, an empty
                list is returned.
        """
        # Requests a trial.
        try:
            resp = (
                self.service_client.projects()
                .locations()
                .studies()
                .trials()
                .suggest(
                    parent=self._make_study_name(),
                    body={
                        "client_id": client_id,
                        "suggestion_count":
                            constants.SUGGESTION_COUNT_PER_REQUEST,
                    },
                )
                .execute()
            )
        except errors.HttpError as e:
            if e.resp.status == 429:
                # Status 429 'RESOURCE_EXAUSTED' is raised when trials more than
                # the maximum limit (1000) of the Optimizer service for a study
                # are requested, or the number of finite search space.
                # For distributed tuning, a tuner worker may request the 1001th
                # trial, while the other tuner worker has not completed training
                # the 1000th trial, and triggers this error.
                tf.get_logger().info("Reached max number of trials.")
                return {}
            else:
                tf.get_logger().info("SuggestTrial failed.")
                raise e

        # Polls the suggestion of long-running operations.
        tf.get_logger().info("CreateTrial: polls the suggestions.")
        operation = self._obtain_long_running_operation(resp)

        suggestions = operation["response"]

        if "trials" not in suggestions:
            if operation["response"]["studyState"] == "INACTIVE":
                raise SuggestionInactiveError(
                    "The study is stopped due to an internal error."
                )
        return suggestions
Beispiel #13
0
def create_or_load_study(
    project_id: Text,
    region: Text,
    study_id: Text,
    study_config: Optional[Dict[Text, Any]] = None,
) -> _OptimizerClient:
    """Factory method for creating or loading a CAIP Optimizer client.

    Given an Optimizer study_config, this will either create or open the
    specified study. It will create it if it doesn't already exist, and open
    it if someone has already created it.

    Note that once a study is created, you CANNOT modify it with this function.

    This function is designed for use in a distributed system, where many jobs
    call create_or_load_study() nearly simultaneously with the same
    `study_config`. In that situation, all clients will end up pointing nicely
    to the same study.

    Args:
        project_id: A GCP project id.
        region: A GCP region. e.g. 'us-central1'.
        study_id: An identifier of the study. If not supplied, system-determined
            unique ID is given. The full study name will be
            projects/{project_id}/locations/{region}/studies/{study_id}.
            And the full trial name will be {study name}/trials/{trial_id}.
        study_config: Study configuration for CAIP Optimizer service.

    Returns:
        An _OptimizerClient object with the specified study created or loaded.
    """
    # Build the API client
    # Note that Optimizer service is exposed as a regional endpoint. As such,
    # an API client needs to be created separately from the default.
    with open(constants.OPTIMIZER_API_DOCUMENT_FILE) as f:
        service_client = discovery.build_from_document(
            service=json.load(f),
            requestBuilder=google_api_client.TFCloudHttpRequest,
        )

    # Creates or loads a study.
    study_parent = "projects/{}/locations/{}".format(project_id, region)

    request = (
        service_client.projects()
        .locations()
        .studies()
        .create(
            body={"study_config": study_config},
            parent=study_parent, studyId=study_id
        )
    )
    try:
        tf.get_logger().info(request.execute())
    except errors.HttpError as e:
        if e.resp.status != 409:  # 409 implies study exists. Handled below.
            raise e

        tf.get_logger().info("Study already existed. Load existing study...")
        # Get study
        study_name = "{}/studies/{}".format(study_parent, study_id)
        x = 1
        while True:
            try:
                service_client.projects().locations().studies().get(
                    name=study_name
                ).execute()
            except errors.HttpError as err:
                if x >= constants.NUM_TRIES_FOR_STUDIES:
                    raise RuntimeError(
                        "GetStudy wasn't successful after {0} tries: "
                        "{1!s}".format(constants.NUM_TRIES_FOR_STUDIES, err)
                    )
                x += 1
                # wait 1 second before trying to get the study again
                time.sleep(1)
            else:
                break

    return _OptimizerClient(service_client, project_id, region, study_id)
Beispiel #14
0
        # Convert the inputs to a Dataset.
        dataset = tf.data.Dataset.from_tensor_slices(inputs)

        # Batch the examples
        dataset = dataset.batch(self.batch_size)

        # Return the dataset.
        return dataset


if __name__ == '__main__':
    from tensorflow_estimator import estimator
    from sklearn.datasets import load_iris

    tf.get_logger().setLevel(2)

    CSV_COLUMN_NAMES = [
        'SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'
    ]  # 'Species'
    X, y = load_iris(True)
    X = pd.DataFrame(X, columns=CSV_COLUMN_NAMES)

    # Feature columns describe how to use the input.
    my_feature_columns = list(
        map(tf.feature_column.numeric_column, CSV_COLUMN_NAMES))

    clf = estimator.DNNClassifier(hidden_units=[10, 10],
                                  feature_columns=my_feature_columns,
                                  n_classes=3,
                                  batch_norm=True)
Beispiel #15
0
def train_and_evaluate_dist(
    table_id: str,
    job_dir: str,
    bucket_name: str,
    prefix: str,
    params: dict,
    job_name=None,
    task_index=-1,
    num_workers=1,
):
    """
    TODO: description
    :param table_id:
    :param params:
    :return:
    """

    global global_table_id
    global global_params
    global TASK_INDEX
    global NUM_WORKERS
    global JOB_NAME
    global BUCKET_NAME
    global PREFIX
    global_table_id = table_id
    # params['batch_size'] = params['batch_size'] * NUM_WORKERS
    global_params = params
    JOB_NAME = job_name
    TASK_INDEX = task_index
    NUM_WORKERS = num_workers
    BUCKET_NAME = bucket_name
    PREFIX = prefix

    # strategy = tf.distribute.MirroredStrategy()

    # strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
    strategy = tf.distribute.experimental.ParameterServerStrategy()

    tf.get_logger().info("NTC_DEBUG: Number of devices in strategy: {}".format(
        strategy.num_replicas_in_sync))

    tf.summary.trace_on(graph=False, profiler=False)

    train_steps_per_epoch = math.ceil(
        data.get_sample_count(table_id, partition='train') /
        params['batch_size'])

    config = tf.estimator.RunConfig(
        log_step_count_steps=global_params['log_step_count_steps'],
        save_summary_steps=global_params['summary_write_steps'],
        # Evaluate every quarter through the epoch
        save_checkpoints_steps=math.floor(train_steps_per_epoch * .25),
        # session_config=get_session_config(job_name, task_index),
        train_distribute=strategy,
        eval_distribute=strategy)

    classifier = tf.estimator.Estimator(
        model_fn=model_fn,
        model_dir=make_job_output(job_dir,
                                  global_params['no_generated_job_path']),
        config=config)

    if global_params['data_source'] == 'bigquery':
        input_fn_train = input_fn_train_bq
        input_fn_eval = input_fn_eval_bq
    elif global_params['data_source'] == 'avro':
        input_fn_train = input_fn_train_avro
        input_fn_eval = input_fn_eval_avro

    tf.estimator.train_and_evaluate(
        classifier,
        train_spec=tf.estimator.TrainSpec(input_fn=input_fn_train,
                                          max_steps=train_steps_per_epoch *
                                          params['epochs']),
        eval_spec=tf.estimator.EvalSpec(
            input_fn=input_fn_eval,
            steps=math.ceil(
                data.get_sample_count(table_id, partition='validation') /
                params['batch_size']),
            # throttle_secs=60,
        ))
Beispiel #16
0
def custom_transformer_training_example():
    run_type = 'train'  # Run type: 'train' or 'translate'.
    train_features_filepath = './toy-ende/src-train.txt'  # Path to the source file.
    train_labels_filepath = './toy-ende/tgt-train.txt'  # Path to the target file.
    eval_features_filepath = './toy-ende/src-val.txt'  # Path to the source file.
    eval_labels_filepath = './toy-ende/tgt-val.txt'  # Path to the target file.
    source_vocabulary_filepath = './toy-ende/src-vocab.txt'  # Path to the source vocabulary.
    target_vocabulary_filepath = './toy-ende/tgt-vocab.txt'  # Path to the target vocabulary.
    model_dir_path = './checkpoint'  # Directory where checkpoint are written.

    # See http://opennmt.net/OpenNMT-tf/configuration.html for a complete specification of the configuration.
    data_config = {
        'source_vocabulary': source_vocabulary_filepath,
        'target_vocabulary': target_vocabulary_filepath,
    }

    #--------------------
    # Define the model.
    # For the purpose of this example, the model components (encoder, decoder, etc.) will be called separately.
    model = onmt.models.SequenceToSequence(
        source_inputter=onmt.inputters.WordEmbedder(embedding_size=512),
        target_inputter=onmt.inputters.WordEmbedder(embedding_size=512),
        encoder=onmt.encoders.SelfAttentionEncoder(num_layers=6,
                                                   num_units=512,
                                                   num_heads=8,
                                                   ffn_inner_dim=2048,
                                                   dropout=0.1,
                                                   attention_dropout=0.1,
                                                   ffn_dropout=0.1),
        decoder=onmt.decoders.SelfAttentionDecoder(num_layers=6,
                                                   num_units=512,
                                                   num_heads=8,
                                                   ffn_inner_dim=2048,
                                                   dropout=0.1,
                                                   attention_dropout=0.1,
                                                   ffn_dropout=0.1))

    # Define the learning rate schedule and the optimizer.
    learning_rate = onmt.schedules.NoamDecay(scale=2.0,
                                             model_dim=512,
                                             warmup_steps=8000)
    optimizer = tfa.optimizers.LazyAdam(learning_rate)

    # Track the model and optimizer weights.
    checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)

    #--------------------
    model.initialize(data_config)

    checkpoint_manager = tf.train.CheckpointManager(checkpoint,
                                                    model_dir_path,
                                                    max_to_keep=5)
    if checkpoint_manager.latest_checkpoint is not None:
        tf.get_logger().info('Restoring parameters from %s',
                             checkpoint_manager.latest_checkpoint)
        checkpoint.restore(checkpoint_manager.latest_checkpoint)

    if run_type == 'train':
        train(model, optimizer, learning_rate, train_features_filepath,
              train_labels_filepath, checkpoint_manager)
    elif run_type == 'translate':
        translate(model, train_features_filepath)
Beispiel #17
0
import cv2
import numpy as np
from bamot.config import CONFIG as config
from bamot.core.base_types import (CameraParameters, Feature, FeatureMatcher,
                                   Landmark, Match, StereoCamera)
from g2o import AngleAxis
from PIL import Image, ImageDraw

if TYPE_CHECKING:
    import tensorflow as tf

if config.FEATURE_MATCHER != "orb":
    import bamot.thirdparty.SuperPoint.superpoint.match_features_demo as sp
    import tensorflow as tf

    tf.get_logger().setLevel(logging.ERROR)  # surpress TF1 -> TF2 warnings
    tf.config.threading.set_inter_op_parallelism_threads(
        2
    )  # s.t. extraction can run in parallel
    if tf.config.list_physical_devices("GPU"):
        LOADED = tf.saved_model.load(config.SUPERPOINT_WEIGHTS_PATH)
        MODEL = LOADED.signatures["serving_default"]

LOGGER = logging.getLogger("UTIL:CV")


class TriangulationError(Exception):
    pass


def get_oobbox_vec(pos: np.ndarray, yaw: np.ndarray, dims: np.ndarray) -> np.ndarray:
Beispiel #18
0
def train(
        model,
        optimizer,
        learning_rate,
        source_file,
        target_file,
        checkpoint_manager,
        maximum_length=100,
        shuffle_buffer_size=-1,  # Uniform shuffle.
        train_steps=100000,
        save_every=1000,
        report_every=100):
    """Runs the training loop.
	Args:
		source_file: The source training file.
		target_file: The target training file.
		checkpoint_manager: The checkpoint manager.
		maximum_length: Filter sequences longer than this.
		shuffle_buffer_size: How many examples to load for shuffling.
		train_steps: Train for this many iterations.
		save_every: Save a checkpoint every this many iterations.
		report_every: Report training progress every this many iterations.
	"""

    # Create the training dataset.
    dataset = model.examples_inputter.make_training_dataset(
        source_file,
        target_file,
        batch_size=3072,
        batch_type='tokens',
        shuffle_buffer_size=shuffle_buffer_size,
        length_bucket_width=
        1,  # Bucketize sequences by the same length for efficiency.
        maximum_features_length=maximum_length,
        maximum_labels_length=maximum_length)

    @tf.function(input_signature=dataset.element_spec)
    def training_step(source, target):
        # Run the encoder.
        source_inputs = model.features_inputter(source, training=True)
        encoder_outputs, _, _ = model.encoder(source_inputs,
                                              source['length'],
                                              training=True)

        # Run the decoder.
        target_inputs = model.labels_inputter(target, training=True)
        decoder_state = model.decoder.initial_state(
            memory=encoder_outputs, memory_sequence_length=source['length'])
        logits, _, _ = model.decoder(target_inputs,
                                     target['length'],
                                     state=decoder_state,
                                     training=True)

        # Compute the cross entropy loss.
        loss_num, loss_den, _ = onmt.utils.cross_entropy_sequence_loss(
            logits,
            target['ids_out'],
            target['length'],
            label_smoothing=0.1,
            average_in_time=True,
            training=True)
        loss = loss_num / loss_den

        # Compute and apply the gradients.
        variables = model.trainable_variables
        gradients = optimizer.get_gradients(loss, variables)
        optimizer.apply_gradients(list(zip(gradients, variables)))
        return loss

    # Runs the training loop.
    for source, target in dataset:
        loss = training_step(source, target)
        step = optimizer.iterations.numpy()
        if step % report_every == 0:
            tf.get_logger().info('Step = %d ; Learning rate = %f ; Loss = %f',
                                 step, learning_rate(step), loss)
        if step % save_every == 0:
            tf.get_logger().info('Saving checkpoint for step %d', step)
            checkpoint_manager.save(checkpoint_number=step)
        if step == train_steps:
            break
def main(_):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)
    model = ModelMLossHead(size=cfg['input_size'],
                         embd_shape=cfg['embd_shape'],
                         backbone_type=cfg['backbone_type'],
                         training=True, # here equal false, just get the model without acrHead, to load the model trained by arcface
                         cfg=cfg)

    cifar = Cifar(cfg['batch_size'])
    train_dataset = cifar.build_training_data()
    val_dataset = cifar.build_validation_data()
    dataset_len = cfg['num_samples']
    steps_per_epoch = dataset_len // cfg['batch_size']

    learning_rate = tf.constant(cfg['base_lr'])
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    # optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
    # optimiser = tf.train.MomentumOptimizer(learning_rate,momentum=0.9, )
    for x in model.trainable_weights:
        print("trainable:",x.name)
    print('\n')
    model.summary(line_length=80)

    ckpt_path = tf.train.latest_checkpoint('./checkpoints/' + cfg['sub_name'])
    if ckpt_path is not None:
        print("[*] load ckpt from {}".format(ckpt_path))
        model.load_weights(ckpt_path)
        epochs, steps = get_ckpt_inf(ckpt_path, steps_per_epoch)
    else:
        print("[*] training from scratch.")
        epochs, steps = 1, 1


    if FLAGS.mode == 'eager_tf':
        # Eager mode is great for debugging
        # Non eager graph mode is recommended for real training
        summary_writer = tf.summary.create_file_writer(
            './logs/' + cfg['sub_name'])

        train_dataset = iter(train_dataset)

        while epochs <= cfg['epochs']:
            if steps % 5 == 0:
                start = time.time()

            inputs, labels = next(train_dataset) #print(inputs[0][1][:])  labels[2][:]

            with tf.GradientTape() as tape:
                logist = model((inputs, labels), training=True)
                reg_loss = tf.cast(tf.reduce_sum(model.losses),tf.double)
                pred_loss = 0.0
                # logist = tf.cast(logist,tf.double)

                total_loss = reg_loss

            grads = tape.gradient(total_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            if steps % 5 == 0:
                end = time.time()
                verb_str = "Epoch {}/{}: {}/{}, loss={:.2f}, lr={:.4f}, time per step={:.2f}s, remaining time 4 this epoch={:.2f}min"
                print(verb_str.format(epochs, cfg['epochs'],
                                      steps % steps_per_epoch,
                                      steps_per_epoch,
                                      total_loss.numpy(),
                                      learning_rate.numpy(),end - start,(steps_per_epoch -(steps % steps_per_epoch)) * (end - start) /60.0))

                with summary_writer.as_default():
                    tf.summary.scalar(
                        'loss/total loss', total_loss, step=steps)
                    tf.summary.scalar(
                        'loss/pred loss', pred_loss, step=steps)
                    tf.summary.scalar(
                        'loss/reg loss', reg_loss, step=steps)
                    tf.summary.scalar(
                        'learning rate', optimizer.lr, step=steps)

            if steps % cfg['save_steps'] == 0:
                print('[*] save ckpt file!')
                model.save_weights('checkpoints/{}/e_{}_b_{}.ckpt'.format(
                    cfg['sub_name'], epochs, steps % steps_per_epoch))

            steps += 1
            epochs = steps // steps_per_epoch + 1
    else:
        print("[*] only support eager_tf!")
        model.compile(optimizer=optimizer, loss=None)
        mc_callback = ModelCheckpoint(
            'checkpoints/' + cfg['sub_name'] + '/e_{epoch}_b_{batch}.ckpt',
            save_freq=cfg['save_steps'] * cfg['batch_size'], verbose=1,
            save_weights_only=True)
        tb_callback = TensorBoard(log_dir='logs/'+ cfg['sub_name'],
                                  update_freq=cfg['batch_size'] * 5,
                                  profile_batch=0)
        tb_callback._total_batches_seen = steps
        tb_callback._samples_seen = steps * cfg['batch_size']
        callbacks = [mc_callback, tb_callback]

        def batch_generator(train_dataset):
            train_dataset = iter(train_dataset)
            while True:
                inputs, labels = next(train_dataset) #print(inputs[0][1][:])  labels[2][:]
                yield [inputs, labels]

        model.fit_generator(batch_generator(train_dataset),
                  epochs=cfg['epochs'],
                  steps_per_epoch=steps_per_epoch,
                  callbacks=callbacks,
                  initial_epoch=epochs - 1)

    print("[*] training done!")
Beispiel #20
0
    def __call__(
        self,
        dataset,
        max_step=None,
        accum_steps=1,
        report_steps=100,
        save_steps=5000,
        evaluator=None,
        eval_steps=5000,
        moving_average_decay=None,
    ):
        """Runs the training.

        Args:
          dataset: A ``tf.data.Dataset`` or a function taking a ``tf.distribute.InputContext``
            instance and returning a ``tf.data.Dataset``.
          max_step: The final training step.
          accum_steps: The number of gradient accumulation steps.
          report_steps: Report status every this many steps.
          save_steps: Save a checkpoint every this many steps.
          evaluator: A :class:`opennmt.evaluation.Evaluator` instance to call for
            evaluation.
          eval_steps: Evaluate every this many steps.
          moving_average_decay: If set, maintain an exponential moving average of the model
            variables using this decay value (usually close to 1, e.g. 0.9999). See
            https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage.

        Returns:
          A dictionary with various training statistics.
        """
        if max_step is not None and self._optimizer.iterations.numpy(
        ) >= max_step:
            raise RuntimeError(
                "The training already reached max_step (%d). If you "
                "want to continue the training, you should increase the "
                "max_step value in the training parameters." % max_step)
        if evaluator is not None and evaluator.should_stop():
            raise RuntimeError(
                "The early stopping conditions are already met. If you "
                "want to continue the training, you should update your "
                "early stopping parameters.")

        self._gradient_accumulator.reset()

        with self._summary_writer.as_default():
            self._training_stats = TrainingStats(
                self._model, self._optimizer, reduce_fn=self._all_reduce_sum)
            iterations = self._optimizer.iterations
            tf.summary.experimental.set_step(iterations)

            step = None
            moving_average = None
            for loss in self._steps(dataset,
                                    accum_steps=accum_steps,
                                    report_steps=report_steps):
                if tf.math.is_nan(loss):
                    raise RuntimeError("Model diverged with loss = NaN.")

                if moving_average_decay is not None and self._is_master:
                    if moving_average is None:
                        moving_average = MovingAverage(
                            self._model.trainable_variables,
                            iterations,
                            decay=moving_average_decay,
                        )
                    self._update_moving_average(moving_average)

                step = iterations.numpy()
                reset_throughput = False
                self._training_stats.update_on_step(step, loss)
                if step % report_steps == 0:
                    self._training_stats.log(self._is_master)
                    reset_throughput = True
                if step == 1 or (save_steps is not None
                                 and step % save_steps == 0):
                    self._save_checkpoint(step, moving_average=moving_average)
                    reset_throughput = True
                if eval_steps is not None and step % eval_steps == 0:
                    early_stop = self._evaluate(evaluator,
                                                step,
                                                moving_average=moving_average)
                    reset_throughput = True
                    if early_stop:
                        tf.get_logger().warning(
                            "Early stopping conditions are met. Exiting.")
                        break
                if step == max_step:
                    break
                if reset_throughput:
                    self._training_stats.reset_throughput()

            if step is None:
                raise RuntimeError(
                    "No training steps were executed. This usually means the "
                    "training file is empty or all examples were filtered out. "
                    "For the latter, verify that the maximum_*_length values are "
                    "consistent with your data.")

            self._training_stats.log_final(self._is_master)
            summary = self._training_stats.get_global_summary()
            self._save_checkpoint(step, moving_average=moving_average)
            self._evaluate(evaluator, step, moving_average=moving_average)
            return summary
Beispiel #21
0
    def __init__(self, config_builder):
        """Constructor for WitWidgetBase.

    Args:
      config_builder: WitConfigBuilder object containing settings for WIT.
    """
        tf.get_logger().setLevel(logging.WARNING)
        config = config_builder.build()
        copied_config = dict(config)
        self.estimator_and_spec = (dict(config.get('estimator_and_spec'))
                                   if 'estimator_and_spec' in config else {})
        self.compare_estimator_and_spec = (
            dict(config.get('compare_estimator_and_spec'))
            if 'compare_estimator_and_spec' in config else {})
        if 'estimator_and_spec' in copied_config:
            del copied_config['estimator_and_spec']
        if 'compare_estimator_and_spec' in copied_config:
            del copied_config['compare_estimator_and_spec']

        self.custom_predict_fn = config.get('custom_predict_fn')
        self.compare_custom_predict_fn = config.get(
            'compare_custom_predict_fn')
        self.custom_distance_fn = config.get('custom_distance_fn')
        self.adjust_prediction_fn = config.get('adjust_prediction')
        self.compare_adjust_prediction_fn = config.get(
            'compare_adjust_prediction')
        self.adjust_example_fn = config.get('adjust_example')
        self.compare_adjust_example_fn = config.get('compare_adjust_example')
        self.adjust_attribution_fn = config.get('adjust_attribution')
        self.compare_adjust_attribution_fn = config.get(
            'compare_adjust_attribution')

        if 'custom_predict_fn' in copied_config:
            del copied_config['custom_predict_fn']
        if 'compare_custom_predict_fn' in copied_config:
            del copied_config['compare_custom_predict_fn']
        if 'custom_distance_fn' in copied_config:
            del copied_config['custom_distance_fn']
            copied_config['uses_custom_distance_fn'] = True
        if 'adjust_prediction' in copied_config:
            del copied_config['adjust_prediction']
        if 'compare_adjust_prediction' in copied_config:
            del copied_config['compare_adjust_prediction']
        if 'adjust_example' in copied_config:
            del copied_config['adjust_example']
        if 'compare_adjust_example' in copied_config:
            del copied_config['compare_adjust_example']
        if 'adjust_attribution' in copied_config:
            del copied_config['adjust_attribution']
        if 'compare_adjust_attribution' in copied_config:
            del copied_config['compare_adjust_attribution']

        self.set_examples(config['examples'])
        del copied_config['examples']

        self.config = copied_config

        # If using AI Platform for prediction, set the correct custom prediction
        # functions.
        if self.config.get('use_aip'):
            self.custom_predict_fn = self._predict_aip_model
        if self.config.get('compare_use_aip'):
            self.compare_custom_predict_fn = self._predict_aip_compare_model

        # If using JSON input (not Example protos) and a custom predict
        # function, then convert examples to JSON before sending to the
        # custom predict function.
        if self.config.get('uses_json_input'):
            if self.custom_predict_fn is not None:
                user_predict = self.custom_predict_fn

                def wrapped_custom_predict_fn(examples):
                    return user_predict(self._json_from_tf_examples(examples))

                self.custom_predict_fn = wrapped_custom_predict_fn
            if self.compare_custom_predict_fn is not None:
                compare_user_predict = self.compare_custom_predict_fn

                def wrapped_compare_custom_predict_fn(examples):
                    return compare_user_predict(
                        self._json_from_tf_examples(examples))

                self.compare_custom_predict_fn = wrapped_compare_custom_predict_fn
Beispiel #22
0
    def __call__(self, step):
        """Runs the evaluator.

        Args:
          step: The current training step.

        Returns:
          A dictionary of evaluation metrics.
        """
        tf.get_logger().info("Running evaluation for step %d", step)
        output_file = None
        output_path = None
        if self._save_predictions:
            output_path = os.path.join(self._eval_dir,
                                       "predictions.txt.%d" % step)
            output_file = tf.io.gfile.GFile(output_path, "w")
            write_fn = lambda prediction: (self._model.print_prediction(
                prediction, stream=output_file))
            index_fn = lambda prediction: prediction.get("index")
            ordered_writer = misc.OrderRestorer(index_fn, write_fn)

        loss_num = 0
        loss_den = 0
        metrics = self._model.get_metrics()
        for source, target in self._dataset:
            loss, predictions = self._eval_fn(source, target)
            if isinstance(loss, tuple):
                loss_num += loss[0]
                loss_den += loss[1]
            else:
                loss_num += loss
                loss_den += 1
            if metrics:
                self._model.update_metrics(metrics, predictions, target)
            if output_file is not None:
                predictions = {k: v.numpy() for k, v in predictions.items()}
                for prediction in misc.extract_batches(predictions):
                    ordered_writer.push(prediction)
        if loss_den == 0:
            raise RuntimeError("No examples were evaluated")
        loss = loss_num / loss_den

        results = dict(loss=loss, perplexity=tf.math.exp(loss))
        if metrics:
            for name, metric in metrics.items():
                results[name] = metric.result()
        if self._save_predictions:
            tf.get_logger().info("Evaluation predictions saved to %s",
                                 output_path)
            output_file.close()
            for scorer in self._scorers:
                score = scorer(self._labels_file, output_path)
                if isinstance(score, dict):
                    results.update(score)
                else:
                    results[scorer.name] = score

        for name, value in results.items():
            if isinstance(value, tf.Tensor):
                results[name] = value.numpy()

        self._record_results(step, results)
        self._maybe_export(step, results)
        self._maybe_garbage_collect_exports()
        return results
Beispiel #23
0
#!/usr/bin/env python3
# Copyright (c) 2020 Graphcore Ltd. All rights reserved.

import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf

tf.get_logger().setLevel('ERROR')

import argparse
import numpy as np
import random
import subprocess
import sys


def print_beam(output_sequence, probability):
    print("{o}".format(o=" ".join([str(x) for x in output_sequence])))
    print("P = {p:.4f}".format(p=probability))
    print("Log(P) = {p:.4f}".format(p=np.log(probability)))


def beam_search_tf(softmax_input, beam_width=4, top_paths=1):
    print("tensorflow:")
    sess = tf.Session()

    # Shape : [max_time, batch_size, num_classes]
def main(_argv):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg,
                            training=False,
                            iou_th=FLAGS.iou_th,
                            score_th=FLAGS.score_th)

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(model=model)
    if tf.train.latest_checkpoint(checkpoint_dir):
        checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
        print("[*] load ckpt from {}.".format(
            tf.train.latest_checkpoint(checkpoint_dir)))
    else:
        print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
        exit()

    # evaluation on testing dataset
    testset_folder = cfg['testing_dataset_path']
    testset_list = os.path.join(testset_folder, 'label.txt')

    img_paths, _ = load_info(testset_list)
    for img_index, img_path in enumerate(img_paths):
        print(" [{} / {}] det {}".format(img_index + 1, len(img_paths),
                                         img_path))
        img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img_height_raw, img_width_raw, _ = img_raw.shape
        img = np.float32(img_raw.copy())

        if img_height_raw == 720:
            resize = 2.0
        else:
            resize = 1.0

        img = cv2.resize(img,
                         None,
                         None,
                         fx=resize,
                         fy=resize,
                         interpolation=cv2.INTER_LINEAR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # pad input image to avoid unmatched shape problem
        img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))

        # run model
        outputs = model(img[np.newaxis, ...]).numpy()

        # recover padding effect
        outputs = recover_pad_output(outputs, pad_params)

        # write results
        img_name = os.path.basename(img_path)
        sub_dir = os.path.basename(os.path.dirname(img_path))
        save_name = os.path.join(FLAGS.save_folder, sub_dir,
                                 img_name.replace('.jpg', '.txt'))

        pathlib.Path(os.path.join(FLAGS.save_folder,
                                  sub_dir)).mkdir(parents=True, exist_ok=True)

        with open(save_name, "w") as file:
            bboxs = outputs[:, :4]
            clsids = outputs[:, 14]
            confs = outputs[:, -1]

            file_name = img_name + "\n"
            bboxs_num = str(len(bboxs)) + "\n"
            file.write(file_name)
            file.write(bboxs_num)
            for box, conf, clsid in zip(bboxs, confs, clsids):
                x = int(box[0] * img_width_raw)
                y = int(box[1] * img_height_raw)
                w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
                h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
                confidence = str(conf)
                clsid = str(clsid)
                line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) \
                    + " " + confidence + " " + clsid + " \n"
                file.write(line)

        # save images
        pathlib.Path(os.path.join('./results', cfg['sub_name'],
                                  sub_dir)).mkdir(parents=True, exist_ok=True)
        if FLAGS.save_image:
            for prior_index in range(len(outputs)):
                #if outputs[prior_index][15] >= FLAGS.vis_th:
                #    draw_bbox_landm(img_raw, outputs[prior_index],
                #                    img_height_raw, img_width_raw)
                if outputs[prior_index][14] == 1:
                    draw_bbox_landm(img_raw, outputs[prior_index],
                                    img_height_raw, img_width_raw)
                else:
                    draw_bbox_landm_mask(img_raw, outputs[prior_index],
                                         img_height_raw, img_width_raw)
            cv2.imwrite(
                os.path.join('./results', cfg['sub_name'], sub_dir, img_name),
                img_raw)
Beispiel #25
0
#!/usr/bin/env python
# coding: utf-8
"""
Object Detection (On Image) From TF2 Saved Model
=====================================
"""

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
import pathlib
import tensorflow as tf
import cv2
import argparse

tf.get_logger().setLevel('ERROR')           # Suppress TensorFlow logging (2)

parser = argparse.ArgumentParser()
parser.add_argument('--model', help='Folder that the Saved Model is Located In',
                    default='exported-models/my_model')
parser.add_argument('--labels', help='Where the Labelmap is Located',
                    default='annotations/label_map.pbtxt')
parser.add_argument('--image', help='Name of the single image to perform detection on',
                    default='image-test/cam06_655.jpg')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
                    default=0.5)
                    
args = parser.parse_args()
# Enable GPU dynamic memory allocation
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
Beispiel #26
0
def main(_):
    # init
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)
    set_memory_growth()

    cfg = load_yaml(FLAGS.cfg_path)

    # define network
    model = RetinaFaceModel(cfg, training=True)
    model.summary(line_length=80)

    # define prior box
    priors = prior_box((cfg['input_size'], cfg['input_size']),
                       cfg['min_sizes'],  cfg['steps'], cfg['clip'])

    # load dataset
    train_dataset = load_dataset(cfg, priors, shuffle=True)

    # define optimizer
    steps_per_epoch = cfg['dataset_len'] // cfg['batch_size']
    learning_rate = MultiStepWarmUpLR(
        initial_learning_rate=cfg['init_lr'],
        lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']],
        lr_rate=cfg['lr_rate'],
        warmup_steps=cfg['warmup_epoch'] * steps_per_epoch,
        min_lr=cfg['min_lr'])
    optimizer = tf.keras.optimizers.SGD(
        learning_rate=learning_rate, momentum=0.9, nesterov=True)

    # define losses function
    multi_box_loss = MultiBoxLoss()

    # load checkpoint
    checkpoint_dir = './checkpoints/' + cfg['sub_name']
    checkpoint = tf.train.Checkpoint(step=tf.Variable(0, name='step'),
                                     optimizer=optimizer,
                                     model=model)
    manager = tf.train.CheckpointManager(checkpoint=checkpoint,
                                         directory=checkpoint_dir,
                                         max_to_keep=3)
    if manager.latest_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
        print('[*] load ckpt from {} at step {}.'.format(
            manager.latest_checkpoint, checkpoint.step.numpy()))
    else:
        print("[*] training from scratch.")

    # define training step function
    @tf.function
    def train_step(inputs, labels):
        with tf.GradientTape() as tape:
            predictions = model(inputs, training=True)

            losses = {}
            losses['reg'] = tf.reduce_sum(model.losses)
            losses['loc'], losses['landm'], losses['class'] = \
                multi_box_loss(labels, predictions)
            total_loss = tf.add_n([l for l in losses.values()])

        grads = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        return total_loss, losses

    # training loop
    summary_writer = tf.summary.create_file_writer('./logs/' + cfg['sub_name'])
    remain_steps = max(
        steps_per_epoch * cfg['epoch'] - checkpoint.step.numpy(), 0)
    prog_bar = ProgressBar(steps_per_epoch,
                           checkpoint.step.numpy() % steps_per_epoch)

    for inputs, labels in train_dataset.take(remain_steps):
        checkpoint.step.assign_add(1)
        steps = checkpoint.step.numpy()

        total_loss, losses = train_step(inputs, labels)
        
        if steps % 100 == 0:

            prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format(
            ((steps - 1) // steps_per_epoch) + 1, cfg['epoch'],
            total_loss.numpy(), optimizer.lr(steps).numpy()))

            with summary_writer.as_default():
                tf.summary.scalar(
                    'loss/total_loss', total_loss, step=steps)
                for k, l in losses.items():
                    tf.summary.scalar('loss/{}'.format(k), l, step=steps)
                tf.summary.scalar(
                    'learning_rate', optimizer.lr(steps), step=steps)

        if steps % cfg['save_steps'] == 0:
            manager.save()
            print("\n[*] save ckpt file at {}".format(
                manager.latest_checkpoint))

    manager.save()
    print("\n[*] training done! save ckpt file at {}".format(
        manager.latest_checkpoint))
Beispiel #27
0
    'uniform', 'uniform-at', 'user', 'user-at', 'item', 'item-at', 'both',
    'both-at', 'nb', 'nb-at', 'nb_true', 'nb_true-at'
]

parser = argparse.ArgumentParser()
parser.add_argument('--data', '-d', type=str, required=True)
parser.add_argument('--model_name',
                    '-m',
                    type=str,
                    choices=possible_model_names,
                    required=True)
parser.add_argument('--tuning', '-t', action='store_true')

if __name__ == "__main__":
    warnings.filterwarnings("ignore")
    tf.get_logger().setLevel("ERROR")
    args = parser.parse_args()

    # hyper-parameters
    config = yaml.safe_load(open('../config.yaml', 'r'))
    eta = config['eta']
    batch_size = config['batch_size']
    max_iters = config['max_iters']
    pre_iters = config['pre_iters']
    post_steps = config['post_steps']
    post_iters = config['post_iters']
    num_sims = config['num_sims']
    n_trials = config['n_trials']
    model_name = args.model_name
    tuning = args.tuning
    data = args.data
Beispiel #28
0
logger = logging.getLogger(__name__.split(".")[0])
logger.setLevel(_log_level)

# Create and format the log handler
_console_handler = logging.StreamHandler()
_console_handler.setLevel(_log_level)
_console_format = logging.Formatter("[%(levelname)s] (%(name)s) %(message)s")
_console_handler.setFormatter(_console_format)
logger.addHandler(_console_handler)

# pdfflow options set, now import tensorfow to prepare convenience wrappers
# and set any options that we need
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "1")
import tensorflow as tf

tf.get_logger().setLevel(_tf_log_level)


def run_eager(flag=True):
    """Wrapper around `run_functions_eagerly`
    When used no function is compiled
    """
    if tf.__version__ < "2.3.0":
        tf.config.experimental_run_functions_eagerly(flag)
    else:
        tf.config.run_functions_eagerly(flag)


# set the precision type
if _float_env == "64":
    DTYPE = tf.float64
Beispiel #29
0
    def predict(self, input_data, inputs_err=None):
        """
        Test model, High performance version designed for fast variational inference on GPU

        :param input_data: Data to be inferred with neural network
        :type input_data: ndarray
        :param inputs_err: Error for input_data, same shape with input_data.
        :type inputs_err: Union([NoneType, ndarray])
        :return: prediction and prediction uncertainty
        :History:
            | 2018-Jan-06 - Written - Henry Leung (University of Toronto)
            | 2018-Apr-12 - Updated - Henry Leung (University of Toronto)
        """
        self.has_model_check()

        if gpu_availability() is False and self.mc_num > 25:
            warnings.warn(
                f'You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can '
                f'potentially be very slow! \n '
                f'A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n'
                f'This is just a warning, and will not shown if mc_num < 25 on CPU'
            )
            if self.mc_num < 2:
                raise AttributeError("mc_num cannot be smaller than 2")

        # if no error array then just zeros
        if inputs_err is None:
            inputs_err = np.zeros_like(input_data)
        else:
            inputs_err = np.atleast_2d(inputs_err)
            inputs_err /= self.input_std['input']

        input_data = {"input": input_data, "input_err": inputs_err}
        input_data = self.pre_testing_checklist_master(input_data)

        if self.input_normalizer is not None:
            input_array = self.input_normalizer.normalize(input_data,
                                                          calc=False)
        else:
            # Prevent shallow copy issue
            input_array = np.array(input_data)
            input_array -= self.input_mean['input']
            input_array /= self.input_std['input']

        total_test_num = input_data['input'].shape[0]  # Number of testing data

        # for number of training data smaller than batch_size
        if total_test_num < self.batch_size:
            batch_size = total_test_num
        else:
            batch_size = self.batch_size

        # Due to the nature of how generator works, no overlapped prediction
        data_gen_shape = (total_test_num // batch_size) * batch_size
        remainder_shape = total_test_num - data_gen_shape  # Remainder from generator

        norm_data_main = {}
        norm_data_remainder = {}
        for name in input_array.keys():
            norm_data_main.update({name: input_array[name][:data_gen_shape]})
            norm_data_remainder.update(
                {name: input_array[name][data_gen_shape:]})

        # Data Generator for prediction
        with tqdm(total=total_test_num, unit="sample") as pbar:
            pbar.set_postfix({'Monte-Carlo': self.mc_num})
            # suppress pfor warning from TF
            old_level = tf.get_logger().level
            tf.get_logger().setLevel('ERROR')

            prediction_generator = BayesianCNNPredDataGenerator(
                batch_size=batch_size,
                shuffle=False,
                steps_per_epoch=data_gen_shape // batch_size,
                data=[norm_data_main],
                pbar=pbar)

            new = FastMCInference(self.mc_num)(self.keras_model_predict)

            result = np.asarray(new.predict(prediction_generator))

            if remainder_shape != 0:  # deal with remainder
                remainder_generator = BayesianCNNPredDataGenerator(
                    batch_size=remainder_shape,
                    shuffle=False,
                    steps_per_epoch=1,
                    data=[norm_data_remainder],
                    pbar=pbar)
                remainder_result = np.asarray(new.predict(remainder_generator))
                if remainder_shape == 1:
                    remainder_result = np.expand_dims(remainder_result, axis=0)
                result = np.concatenate((result, remainder_result))

            tf.get_logger().setLevel(old_level)

        # in case only 1 test data point, in such case we need to add a dimension
        if result.ndim < 3 and batch_size == 1:
            result = np.expand_dims(result, axis=0)

        half_first_dim = result.shape[
            1] // 2  # result.shape[1] is guarantee an even number, otherwise sth is wrong

        predictions = result[:, :half_first_dim, 0]  # mean prediction
        mc_dropout_uncertainty = result[:, :half_first_dim, 1] * (
            self.labels_std['output']**2)  # model uncertainty
        predictions_var = np.exp(result[:, half_first_dim:, 0]) * (
            self.labels_std['output']**2)  # predictive uncertainty

        if self.labels_normalizer is not None:
            predictions = self.labels_normalizer.denormalize(
                list_to_dict([self.keras_model.output_names[0]], predictions))
            predictions = predictions['output']
        else:
            predictions *= self.labels_std['output']
            predictions += self.labels_mean['output']

        if self.task == 'regression':
            # Predictive variance
            pred_var = predictions_var + mc_dropout_uncertainty  # epistemic plus aleatoric uncertainty
            pred_uncertainty = np.sqrt(pred_var)  # Convert back to std error

            # final correction from variance to standard derivation
            mc_dropout_uncertainty = np.sqrt(mc_dropout_uncertainty)
            predictive_uncertainty = np.sqrt(predictions_var)

        elif self.task == 'classification':
            # we want entropy for classification uncertainty
            predicted_class = np.argmax(predictions, axis=1)
            mc_dropout_uncertainty = np.ones_like(predicted_class, dtype=float)
            predictive_uncertainty = np.ones_like(predicted_class, dtype=float)

            # center variance
            predictions_var -= 1.
            for i in range(predicted_class.shape[0]):
                all_prediction = np.array(predictions[i, :])
                mc_dropout_uncertainty[i] = -np.sum(
                    all_prediction * np.log(all_prediction))
                predictive_uncertainty[i] = predictions_var[i,
                                                            predicted_class[i]]

            pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty
            # We only want the predicted class back
            predictions = predicted_class

        elif self.task == 'binary_classification':
            # we want entropy for classification uncertainty, so need prediction in logits space
            mc_dropout_uncertainty = -np.sum(predictions * np.log(predictions),
                                             axis=0)
            # need to activate before round to int so that the prediction is always 0 or 1
            predictions = np.rint(sigmoid(predictions))
            predictive_uncertainty = predictions_var
            pred_uncertainty = mc_dropout_uncertainty + predictions_var

        else:
            raise AttributeError('Unknown Task')

        return predictions, {
            'total': pred_uncertainty,
            'model': mc_dropout_uncertainty,
            'predictive': predictive_uncertainty
        }
Beispiel #30
0
def train(FLAGS):
    """Train yolov3 with different backbone
    """
    prune = FLAGS['prune']
    opt = FLAGS['opt']
    backbone = FLAGS['backbone']
    log_dir = FLAGS['log_directory'] or os.path.join(
        'logs',
        str(backbone).split('.')[1].lower() + str(datetime.date.today()))
    if tf.io.gfile.exists(log_dir) is not True:
        tf.io.gfile.mkdir(log_dir)
    batch_size = FLAGS['batch_size']
    train_dataset_glob = FLAGS['train_dataset']
    val_dataset_glob = FLAGS['val_dataset']
    test_dataset_glob = FLAGS['test_dataset']
    freeze = FLAGS['freeze']
    freeze_step = FLAGS['epochs'][0]
    train_step = FLAGS['epochs'][1]

    if opt == OPT.DEBUG:
        tf.config.experimental_run_functions_eagerly(True)
        tf.debugging.set_log_device_placement(True)
        tf.get_logger().setLevel(tf.logging.DEBUG)
    elif opt == OPT.XLA:
        config = tf.ConfigProto()
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
        sess = tf.Session(config=config)
        tf.keras.backend.set_session(sess)

    class_names = get_classes(FLAGS['classes_path'])
    num_classes = len(class_names)
    anchors = get_anchors(FLAGS['anchors_path'])
    input_shape = FLAGS['input_size']  # multiple of 32, hw
    model_path = FLAGS['model']
    if model_path and model_path.endswith('.h5') is not True:
        model_path = tf.train.latest_checkpoint(model_path)
    lr = FLAGS['learning_rate']
    tpu_address = FLAGS['tpu_address']
    if tpu_address is not None:
        cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            tpu=tpu_address)
        tf.config.experimental_connect_to_host(cluster_resolver.master())
        tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
        strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver)
    else:
        strategy = tf.distribute.MirroredStrategy(devices=FLAGS['gpus'])
    batch_size = batch_size * strategy.num_replicas_in_sync

    train_dataset_builder = Dataset(train_dataset_glob, batch_size, anchors,
                                    num_classes, input_shape)
    train_dataset, train_num = train_dataset_builder.build()
    val_dataset_builder = Dataset(val_dataset_glob,
                                  batch_size,
                                  anchors,
                                  num_classes,
                                  input_shape,
                                  mode=DATASET_MODE.VALIDATE)
    val_dataset, val_num = val_dataset_builder.build()
    map_callback = MAPCallback(test_dataset_glob, input_shape, anchors,
                               class_names)
    logging = tf.keras.callbacks.TensorBoard(write_graph=False,
                                             log_dir=log_dir,
                                             write_images=True)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(
        log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'),
                                                    monitor='val_loss',
                                                    save_weights_only=True,
                                                    save_best_only=True,
                                                    period=3)
    cos_lr = tf.keras.callbacks.LearningRateScheduler(
        lambda epoch, _: tf.keras.experimental.CosineDecay(lr[1], train_step)
        (epoch - freeze_step).numpy(), 1)
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=(freeze_step + train_step) // 10,
        verbose=1)
    if tf.version.VERSION.startswith('1.'):
        loss = [
            lambda y_true, yolo_output: YoloLoss(
                y_true, yolo_output, 0, anchors, print_loss=True),
            lambda y_true, yolo_output: YoloLoss(
                y_true, yolo_output, 1, anchors, print_loss=True),
            lambda y_true, yolo_output: YoloLoss(
                y_true, yolo_output, 2, anchors, print_loss=True)
        ]
    else:
        loss = [
            YoloLoss(idx, anchors, print_loss=False)
            for idx in range(len(anchors) // 3)
        ]

    with strategy.scope():
        factory = ModelFactory(tf.keras.layers.Input(shape=(*input_shape, 3)),
                               weights_path=model_path)
        if backbone == BACKBONE.MOBILENETV2:
            model = factory.build(mobilenetv2_yolo_body,
                                  155,
                                  len(anchors) // 3,
                                  num_classes,
                                  alpha=FLAGS['alpha'])
        elif backbone == BACKBONE.DARKNET53:
            model = factory.build(darknet_yolo_body, 185,
                                  len(anchors) // 3, num_classes)
        elif backbone == BACKBONE.EFFICIENTNET:
            model = factory.build(efficientnet_yolo_body,
                                  499,
                                  FLAGS['model_name'],
                                  len(anchors) // 3,
                                  batch_norm_momentum=0.9,
                                  batch_norm_epsilon=1e-3,
                                  num_classes=num_classes,
                                  drop_connect_rate=0.2,
                                  data_format="channels_first")

    if prune:
        from tensorflow_model_optimization.python.core.api.sparsity import keras as sparsity
        end_step = np.ceil(1.0 * train_num / batch_size).astype(
            np.int32) * train_step
        new_pruning_params = {
            'pruning_schedule':
            sparsity.PolynomialDecay(initial_sparsity=0.5,
                                     final_sparsity=0.9,
                                     begin_step=0,
                                     end_step=end_step,
                                     frequency=1000)
        }
        pruned_model = sparsity.prune_low_magnitude(model,
                                                    **new_pruning_params)
        pruned_model.compile(optimizer=tf.keras.optimizers.Adam(lr[0],
                                                                epsilon=1e-8),
                             loss=loss)
        pruned_model.fit(train_dataset,
                         epochs=train_step,
                         initial_epoch=0,
                         steps_per_epoch=max(1, train_num // batch_size),
                         callbacks=[
                             checkpoint, cos_lr, logging, map_callback,
                             early_stopping
                         ],
                         validation_data=val_dataset,
                         validation_steps=max(1, val_num // batch_size))
        model = sparsity.strip_pruning(pruned_model)
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_pruned.h5'))
        with zipfile.ZipFile(os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_pruned.h5.zip'),
                             'w',
                             compression=zipfile.ZIP_DEFLATED) as f:
            f.write(
                os.path.join(
                    log_dir,
                    str(backbone).split('.')[1].lower() +
                    '_trained_weights_pruned.h5'))
        return

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
    if freeze is True:
        with strategy.scope():
            model.compile(optimizer=tf.keras.optimizers.Adam(lr[0],
                                                             epsilon=1e-8),
                          loss=loss)
        model.fit(train_dataset,
                  epochs=freeze_step,
                  initial_epoch=0,
                  steps_per_epoch=max(1, train_num // batch_size),
                  callbacks=[logging, checkpoint],
                  validation_data=val_dataset,
                  validation_steps=max(1, val_num // batch_size))
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_stage_1.h5'))
    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is not good.
    else:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True
        with strategy.scope():
            model.compile(optimizer=tf.keras.optimizers.Adam(lr[1],
                                                             epsilon=1e-8),
                          loss=loss)  # recompile to apply the change
        print('Unfreeze all of the layers.')
        model.fit(train_dataset,
                  epochs=train_step + freeze_step,
                  initial_epoch=freeze_step,
                  steps_per_epoch=max(1, train_num // batch_size),
                  callbacks=[
                      checkpoint, cos_lr, logging, map_callback, early_stopping
                  ],
                  validation_data=val_dataset,
                  validation_steps=max(1, val_num // batch_size))
        model.save_weights(
            os.path.join(
                log_dir,
                str(backbone).split('.')[1].lower() +
                '_trained_weights_final.h5'))