def main(): """Trains the OpenNMT Transformer or translate with it, according to command line arguments""" model, checkpoint, optimizer, learning_rate = init_model() parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("run", choices=["train", "translate"], help="Run type.") parser.add_argument("--src", required=True, help="Path to the source file.") parser.add_argument("--tgt", help="Path to the target file.") parser.add_argument("--valsrc", help="Path to the validation source file.") parser.add_argument("--valtgt", help="Path to the validation target file.") parser.add_argument("--bpe", help="Enables Byte-Pair Encoding", action="store_true") parser.add_argument("--vocab_size", help="Vocabulary Size", default=16000) parser.add_argument("--bpe_vocab_size", help="BPE Vocabulary Size", default=4000) parser.add_argument("--seed", help="Random seed for the experiment", default=1234) parser.add_argument( "--monosrc", help="Monolingual data source (Target language).", type=str, default="", ) parser.add_argument("--btsrc", help="Back-translation source file") parser.add_argument("--bttgt", help="Back-translation target file") parser.add_argument("--monolen", help="Number of monolingual samples to consider.", default=20000) parser.add_argument( "--bpe_combined", help="Use combined BPE vocabulary for both languages", action="store_true", default=False, ) parser.add_argument( "--validate_now", help="Skips training and validate at current checkpoint", action="store_true", ) parser.add_argument("--output", help="Filename for translated output.", default="output.txt") parser.add_argument( "--model_dir", default="checkpoint", help="Directory where checkpoint are written.", ) args = parser.parse_args() # Tensorflow random seed. tf.random.set_seed(args.seed) combined = args.bpe_combined if args.monosrc != "": combined = True # Combined vocabulary must be used for monolingual data! tf.get_logger().info( "Using combined BPE vocabulary since monolingual data is used!") src = args.src tgt = args.tgt valsrc = args.valsrc valtgt = args.valtgt src_vocab, tgt_vocab = get_vocab_file_names(args.model_dir) vocab_size = int(args.vocab_size) if args.bpe: # Prepare Byte-Pair Encore model + Byte-Pair Encoded Files. vocab_size = int(args.bpe_vocab_size) if args.run == "train": prepare_bpe_models(src, tgt, combined=combined, vocab_size=vocab_size) valsrc, _ = prepare_bpe_files(valsrc, valtgt, combined=combined) src, tgt = prepare_bpe_files(src, tgt, combined=combined) # Rebuilds the vocabulary from scratch using only the input data. if args.run == "train": if not combined: build_vocabulary(src, src_vocab, vocab_size) build_vocabulary(tgt, tgt_vocab, vocab_size) else: # Combined vocabulary! concat_files(src, tgt, "all.tmp") build_vocabulary("all.tmp", src_vocab, vocab_size) build_vocabulary("all.tmp", tgt_vocab, vocab_size) # Add back-tranlated data if requested. if args.btsrc is not None: btsrc = args.btsrc bttgt = args.bttgt if bttgt is None: tf.get_logger().error("Back-translation target must be supplied") exit() if args.bpe: btsrc, bttgt = prepare_bpe_files(btsrc, bttgt, combined=combined) else: tf.get_logger.info( "Warning: Back-translation was not tested without BPE. There could be bugs!" ) tmp_btsrc = "btsrc.tmp" tmp_bttgt = "bttgt.tmp" concat_files(btsrc, src, tmp_btsrc) concat_files(bttgt, tgt, tmp_bttgt) shuffle_file(tmp_btsrc, seed=args.seed) shuffle_file(tmp_bttgt, seed=args.seed) src = tmp_btsrc tgt = tmp_bttgt # Add additionnal monolingual data if requested. if args.monosrc != "": tmp_monosrc = "monosrc.tmp" tmp_monotgt = "monotgt.tmp" if not args.bpe: tf.get_logger().error( "Monolingual data can only be used with BPE!") exit() prepare_bpe_files(args.monosrc, None, combined=combined) concat_files(src, args.monosrc + ".bpe", tmp_monosrc, lines1=None, lines2=args.monolen) concat_files(tgt, args.monosrc + ".bpe", tmp_monotgt, lines1=None, lines2=args.monolen) shuffle_file(tmp_monosrc, seed=args.seed, inplace=True) shuffle_file(tmp_monotgt, seed=args.seed, inplace=True) src = tmp_monosrc tgt = tmp_monotgt init_data_config(model, src_vocab, tgt_vocab) checkpoint_manager = init_checkpoint_manager_and_load_latest_checkpoint( checkpoint, args.model_dir) if args.run == "train": tf.get_logger().info( f"Training on {src}, {tgt}\nValidating on {valsrc}, {valtgt}.\n" + f"Vocab = {src_vocab}, {tgt_vocab}\n BPE={args.bpe}") train( model, optimizer, learning_rate, src, tgt, checkpoint_manager, validation_source_file=valsrc, validation_target_file=valtgt, validate_now=args.validate_now, bpe=args.bpe, bpe_combined=combined, ) elif args.run == "translate": temp = tempfile.NamedTemporaryFile() tf.get_logger().info(f"Translating {src} file to {temp}") with tempfile.NamedTemporaryFile() as f: temp = f.name translate(model, src, output_file=temp) if args.bpe: output_file_name = decode_bpe_file(temp) else: import shutil shutil.copyfile(temp, output_file_name) tf.get_logger().info(f"BPE decoded {temp} file to {output_file_name}")
#https://github.com/marload/DeepRL-TensorFlow2 import tensorflow as tf import tensorflow.keras.layers as kl import logging tf.get_logger().setLevel(logging.ERROR) import datetime import gym import argparse import numpy as np from collections import deque import random from gym import wrappers import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from visdom import Visdom def argument_parse(): parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default="CartPole-v0") parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--learning_rate', type=float, default=0.005) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--epsilon_init', type=float, default=1.0) parser.add_argument('--epsilon_min', type=float, default=0.01) parser.add_argument('--replay_memory_capacity', type=int, default=10000) parser.add_argument('--epsilon_decay_end_step', type=int, default=15000) parser.add_argument('--max_steps', type=int, default=30000)
def evaluate(self, input_data, labels, inputs_err=None, labels_err=None): """ Evaluate neural network by provided input data and labels and get back a metrics score :param input_data: Data to be trained with neural network :type input_data: ndarray :param labels: Labels to be trained with neural network :type labels: ndarray :param inputs_err: Error for input_data (if any), same shape with input_data. :type inputs_err: Union([NoneType, ndarray]) :param labels_err: Labels error (if any) :type labels_err: Union([NoneType, ndarray]) :return: metrics score dictionary :rtype: dict :History: 2018-May-20 - Written - Henry Leung (University of Toronto) """ self.has_model_check() if inputs_err is None: inputs_err = np.zeros_like(input_data) if labels_err is None: labels_err = np.zeros_like(labels) input_data = {"input": input_data} labels = {"output": labels} # check if exists (existing means the model has already been trained (e.g. fine-tuning), so we do not need calculate mean/std again) if self.input_normalizer is None: self.input_normalizer = Normalizer(mode=self.input_norm_mode) self.labels_normalizer = Normalizer(mode=self.labels_norm_mode) norm_data = self.input_normalizer.normalize(input_data) self.input_mean, self.input_std = self.input_normalizer.mean_labels, self.input_normalizer.std_labels norm_labels = self.labels_normalizer.normalize(labels) self.labels_mean, self.labels_std = self.labels_normalizer.mean_labels, self.labels_normalizer.std_labels else: norm_data = self.input_normalizer.normalize(input_data, calc=False) norm_labels = self.labels_normalizer.normalize(labels, calc=False) # No need to care about Magic number as loss function looks for magic num in y_true only norm_input_err = inputs_err / self.input_std['input'] norm_labels_err = labels_err / self.labels_std['output'] norm_data.update({ "input_err": norm_input_err, "labels_err": norm_labels_err }) norm_labels.update({"variance_output": norm_labels["output"]}) total_num = input_data['input'].shape[0] eval_batchsize = self.batch_size if total_num > self.batch_size else total_num steps = total_num // self.batch_size if total_num > self.batch_size else 1 start_time = time.time() print("Starting Evaluation") # suppress pfor warning from TF old_level = tf.get_logger().level tf.get_logger().setLevel('ERROR') evaluate_generator = BayesianCNNDataGenerator( batch_size=eval_batchsize, shuffle=False, steps_per_epoch=steps, data=[norm_data, norm_labels]) scores = self.keras_model.evaluate(evaluate_generator) tf.get_logger().setLevel(old_level) if isinstance(scores, float): # make sure scores is iterable scores = list(str(scores)) outputname = self.keras_model.output_names funcname = self.keras_model.metrics_names print( f'Completed Evaluation, {(time.time() - start_time):.{2}f}s elapsed' ) return list_to_dict(funcname, scores)
import tensorflow as tf import tensorflow.keras as keras from tensorflow.compat.v1 import ConfigProto from tensorflow.compat.v1 import InteractiveSession config = ConfigProto() config.gpu_options.allow_growth = True session = InteractiveSession(config=config) tf.get_logger().setLevel('INFO') class MLP: def __init__(self): self.model = None self.lr = 0.01 self.loss = 'categorical_crossentropy' self.metrics = ['accuracy'] self.optimizer = tf.keras.optimizers.SGD(lr=self.lr, decay=self.lr/100, momentum=0.9) def build(self): self.model = tf.keras.models.Sequential() self.model.add(tf.keras.layers.Dense(units=256, activation='relu', input_shape=(784, ))) self.model.add(tf.keras.layers.Dropout(0.2)) self.model.add(tf.keras.layers.Dense(units=128, activation='relu')) self.model.add(tf.keras.layers.Dropout(0.2))
import tensorflow as tf # Import TensorFlow Datasets import tensorflow_datasets as tfds tfds.disable_progress_bar() # Helper libraries import math import numpy as np import matplotlib.pyplot as plt import logging logger = tf.get_logger() logger.setLevel(logging.ERROR) dataset, metadata = tfds.load('fashion_mnist', as_supervised=True, with_info=True) train_dataset, test_dataset = dataset['train'], dataset['test'] class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] num_train_examples = metadata.splits['train'].num_examples num_test_examples = metadata.splits['test'].num_examples print("Number of training examples: {}".format(num_train_examples)) print("Number of test examples: {}".format(num_test_examples)) def normalize(images, labels): images = tf.cast(images, tf.float32) images /= 255 return images, labels
def main(_): tf.logging.set_verbosity(tf.logging.INFO) logger = tf.get_logger() logger.propagate = False news_config = GroverConfig.from_json_file(FLAGS.config_file) tf.gfile.MakeDirs(FLAGS.output_dir) input_files = [] for input_pattern in FLAGS.input_file.split(","): input_files.extend(tf.gfile.Glob(input_pattern)) # tf.logging.info("*** Input Files ***") # for input_file in input_files: # tf.logging.info(" %s" % input_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=None, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = model_fn_builder(news_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_tpu=FLAGS.use_tpu, num_tpu_cores=FLAGS.num_tpu_cores, eval_batch_size=FLAGS.eval_batch_size ) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, params={'model_dir': FLAGS.output_dir} ) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) train_input_fn = input_fn_builder( input_files=input_files, seq_length=FLAGS.max_seq_length, is_training=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_input_fn = input_fn_builder( input_files=input_files, seq_length=FLAGS.max_seq_length, is_training=False, ) result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key])))
def main(): # # write results to file # out_dir = os.path.join(args.exp + '_save_dir', 'results') # from pathlib import Path # Path(out_dir).mkdir(exist_ok=True) # out_f = os.path.join(out_dir, 'run%s.txt' % args.suffix) # load model tf.logging.log(tf.logging.INFO, 'starting the run') os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0' tf.get_logger().setLevel('INFO') tf.logging.set_verbosity(tf.logging.INFO) name = "_".join( [args.in_data, args.ood_data, os.path.dirname(args.ckpt_file)]) model = tf.make_template('model', model_spec) tf.logging.log(tf.logging.INFO, 'initializing') initializer = tf.global_variables_initializer() tf.logging.log(tf.logging.INFO, 'initialized model') # log_probs_in, ar_in, cdfs_in = get_preds(model, args, args.in_data, 'test', log_prob_func) # log_probs_ood, ar_ood, cdfs_ood = get_preds(model, args, args.ood_data, 'test', log_prob_func) # log_probs_train, ar_train, cdfs_train = get_preds(model, args, args.in_data, 'train', log_prob_func) # # # LR # log_probs_pixel_in = get_log_probs(model, args, args.in_data, 'test') # (N,32,32,3) # log_probs_in = np.mean(log_probs_pixel_in, axis=(1, 2)) # np.save('intermediate/' + name + '_log_probs_in.npy', log_probs_in) # log_probs_pixel_ood = get_log_probs(model, args, args.ood_data, 'test') # log_probs_ood = np.mean(log_probs_pixel_ood, axis=(1, 2)) # np.save('intermediate/' + name + '_log_probs_ood.npy', log_probs_ood) # complexity_in = get_complexity(args, args.in_data, 'test') # complexity_ood = get_complexity(args, args.ood_data, 'test') # np.save('intermediate/' + name + '_complexity_ood.npy', complexity_ood) # np.save('intermediate/' + name + '_complexity_ood.npy', complexity_ood) # print(log_probs_in, log_probs_ood) # print(len(log_probs_in),len(log_probs_ood),len(complexity_in), len(complexity_ood)) # auc, auc_llr = compute_auc_llr(log_probs_in, log_probs_ood, complexity_in, complexity_ood) # tf.logging.log(tf.logging.INFO, f'LL: {auc}') # tf.logging.log(tf.logging.INFO, f'LR: {auc_llr}') # with open(f'results/{name}.txt', 'a') as f: # f.write(f'LL: {auc}\n') # f.write(f'LR: {auc_llr}\n') # # # # TT # log_probs_pixel_train = get_log_probs(model, args, args.in_data, 'train') # log_probs_train = np.mean(log_probs_pixel_train, axis=(1, 2)) # np.save('intermediate/' + name + '_log_probs_train.npy', log_probs_train) # train_entropy = get_entropy(log_probs_train) # typical_ts_in = list(map(abs, log_probs_in - train_entropy)) # typical_ts_ood = list(map(abs, log_probs_ood - train_entropy)) # print('before', typical_ts_in, typical_ts_ood) # # want higher to be better # print('-1', np.array(typical_ts_in) * -1, np.array(typical_ts_ood) * -1) # auc_tt = compute_auc(np.array(typical_ts_in) * -1, np.array(typical_ts_ood) * -1) # tf.logging.log(tf.logging.INFO, f'TT: {auc_tt}') # with open(f'results/{name}.txt', 'a') as f: # f.write(f'TT: {auc_tt}') # # # WN # ar_in = get_ar(model, args, args.in_data, 'test') # ar_ood = get_ar(model, args, args.ood_data, 'test') # ar_train = get_ar(model, args, args.in_data, 'train') # wn_in, wn_ood = time_series_test(np.array(ar_train), np.array(ar_in), np.array(ar_ood), 'bp') # print(len(wn_in), len(wn_ood)) # auc_wn = compute_auc(wn_in * -1, wn_ood * -1) # print(f'WN: {auc_wn}') # with open(f'results/{name}.txt', 'a') as f: # f.write(f'UNIF: {auc_wn}') # UNIF unifs_in = get_cdf_transform(model, args, args.in_data, 'test') # (B,32,32,3) np.save('intermediate/' + name + '_unifs_samples.npy', unifs_in)
eval_spec = tf.estimator.EvalSpec( input_fn.eval_input_fn(feature_config), steps=None if FLAGS.eval_steps < 0 else FLAGS.eval_steps, throttle_secs=FLAGS.throttle_secs) run_config = tf.estimator.RunConfig( model_dir=FLAGS.model_path, save_checkpoints_steps=FLAGS.checkpoint_steps) estimator = tf.estimator.Estimator(model_fn=model_fn.model_fn, config=run_config, params={ 'feature_config': feature_config, }) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) _, shard_id = shard_info.get_shard_info() if 'TF_CONFIG' not in os.environ or shard_id == 0: logging.info("begin the final evaluation:") metrics = estimator.evaluate(input_fn.eval_input_fn(feature_config)) print(metrics) estimator.export_saved_model(FLAGS.model_path, input_fn.build_serving_fn(feature_config)) if __name__ == '__main__': tf.get_logger().setLevel("INFO") app.run(main)
#!/usr/bin/env python # -*- coding: UTF-8 -*- # REF [site] >> # http://opennmt.net/ # https://github.com/OpenNMT/OpenNMT-tf import logging import tensorflow as tf import tensorflow_addons as tfa import opennmt as onmt tf.get_logger().setLevel(logging.INFO) # REF [file] >> ${OpenNMT-tf_HOME}/examples/library/minimal_transformer_training.py def minimal_transformer_training_example(): run_type = 'train' # Run type: 'train' or 'translate'. train_features_filepath = './toy-ende/src-train.txt' # Path to the source file. train_labels_filepath = './toy-ende/tgt-train.txt' # Path to the target file. eval_features_filepath = './toy-ende/src-val.txt' # Path to the source file. eval_labels_filepath = './toy-ende/tgt-val.txt' # Path to the target file. source_vocabulary_filepath = './toy-ende/src-vocab.txt' # Path to the source vocabulary. target_vocabulary_filepath = './toy-ende/tgt-vocab.txt' # Path to the target vocabulary. model_dir_path = './checkpoint' # Directory where checkpoint are written. # See http://opennmt.net/OpenNMT-tf/configuration.html for a complete specification of the configuration. config = { 'model_dir': model_dir_path, 'data': { 'source_vocabulary': source_vocabulary_filepath,
def block_print(log_level): """Disables print function when current logging level > log_level.""" if tf.get_logger().getEffectiveLevel() > log_level: sys.stdout = open(os.devnull, 'w')
from collections import defaultdict from datetime import datetime from pathlib import Path from pprint import pprint from typing import List, Union, Dict, Optional, Any import numpy as np import tensorflow as tf from tqdm import tqdm # See https://github.com/tensorflow/tensorflow/issues/32180 # once this issue will be fixed I can use tf.lite.TFLiteConverter.from_keras_model from keras_detection.utils.tflite_converter_shitfix import from_keras_model LOGGER = tf.get_logger() keras = tf.keras TFLITE_SUFFIX = ".tflite" # No quantization, float32 TFLITE_F32_SUFFIX = ".f32.tflite" # Dynamic range quantized TFLITE_DR_SUFFIX = ".dr.quantized.tflite" # Fixed range quantized TFLITE_FR_SUFFIX = ".fr.quantized.tflite" class TFLiteModel: def __init__(self, tflite_model: Union[bytes, str, Path]): self.tflite_model = tflite_model interpreter, predict_fn = create_tflite_predict_fn(tflite_model) self.interpreter = interpreter
def get_suggestions(self, client_id: Text): """Gets a list of suggested Trials. Args: client_id: An ID that identifies the `Tuner` requesting a `Trial`. `Tuners` that should run the same trial (for instance, when running a multi-worker model) should have the same ID. If multiple suggestTrialsRequests have the same tuner_id, the service will return the identical suggested trial if the trial is PENDING, and provide a new trial if the last suggest trial was completed. Returns: A list of Trials, This may be an empty list in case that a finite search space has been exhausted, if max_num_trials = 1000 has been reached, or if there are no longer any trials that match a supplied Context. Raises: SuggestionInactiveError: Indicates that a suggestion was requested from an inactive study. Note that this is NOT raised when a finite Study runs out of suggestions. In such a case, an empty list is returned. """ # Requests a trial. try: resp = ( self.service_client.projects() .locations() .studies() .trials() .suggest( parent=self._make_study_name(), body={ "client_id": client_id, "suggestion_count": constants.SUGGESTION_COUNT_PER_REQUEST, }, ) .execute() ) except errors.HttpError as e: if e.resp.status == 429: # Status 429 'RESOURCE_EXAUSTED' is raised when trials more than # the maximum limit (1000) of the Optimizer service for a study # are requested, or the number of finite search space. # For distributed tuning, a tuner worker may request the 1001th # trial, while the other tuner worker has not completed training # the 1000th trial, and triggers this error. tf.get_logger().info("Reached max number of trials.") return {} else: tf.get_logger().info("SuggestTrial failed.") raise e # Polls the suggestion of long-running operations. tf.get_logger().info("CreateTrial: polls the suggestions.") operation = self._obtain_long_running_operation(resp) suggestions = operation["response"] if "trials" not in suggestions: if operation["response"]["studyState"] == "INACTIVE": raise SuggestionInactiveError( "The study is stopped due to an internal error." ) return suggestions
def create_or_load_study( project_id: Text, region: Text, study_id: Text, study_config: Optional[Dict[Text, Any]] = None, ) -> _OptimizerClient: """Factory method for creating or loading a CAIP Optimizer client. Given an Optimizer study_config, this will either create or open the specified study. It will create it if it doesn't already exist, and open it if someone has already created it. Note that once a study is created, you CANNOT modify it with this function. This function is designed for use in a distributed system, where many jobs call create_or_load_study() nearly simultaneously with the same `study_config`. In that situation, all clients will end up pointing nicely to the same study. Args: project_id: A GCP project id. region: A GCP region. e.g. 'us-central1'. study_id: An identifier of the study. If not supplied, system-determined unique ID is given. The full study name will be projects/{project_id}/locations/{region}/studies/{study_id}. And the full trial name will be {study name}/trials/{trial_id}. study_config: Study configuration for CAIP Optimizer service. Returns: An _OptimizerClient object with the specified study created or loaded. """ # Build the API client # Note that Optimizer service is exposed as a regional endpoint. As such, # an API client needs to be created separately from the default. with open(constants.OPTIMIZER_API_DOCUMENT_FILE) as f: service_client = discovery.build_from_document( service=json.load(f), requestBuilder=google_api_client.TFCloudHttpRequest, ) # Creates or loads a study. study_parent = "projects/{}/locations/{}".format(project_id, region) request = ( service_client.projects() .locations() .studies() .create( body={"study_config": study_config}, parent=study_parent, studyId=study_id ) ) try: tf.get_logger().info(request.execute()) except errors.HttpError as e: if e.resp.status != 409: # 409 implies study exists. Handled below. raise e tf.get_logger().info("Study already existed. Load existing study...") # Get study study_name = "{}/studies/{}".format(study_parent, study_id) x = 1 while True: try: service_client.projects().locations().studies().get( name=study_name ).execute() except errors.HttpError as err: if x >= constants.NUM_TRIES_FOR_STUDIES: raise RuntimeError( "GetStudy wasn't successful after {0} tries: " "{1!s}".format(constants.NUM_TRIES_FOR_STUDIES, err) ) x += 1 # wait 1 second before trying to get the study again time.sleep(1) else: break return _OptimizerClient(service_client, project_id, region, study_id)
# Convert the inputs to a Dataset. dataset = tf.data.Dataset.from_tensor_slices(inputs) # Batch the examples dataset = dataset.batch(self.batch_size) # Return the dataset. return dataset if __name__ == '__main__': from tensorflow_estimator import estimator from sklearn.datasets import load_iris tf.get_logger().setLevel(2) CSV_COLUMN_NAMES = [ 'SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth' ] # 'Species' X, y = load_iris(True) X = pd.DataFrame(X, columns=CSV_COLUMN_NAMES) # Feature columns describe how to use the input. my_feature_columns = list( map(tf.feature_column.numeric_column, CSV_COLUMN_NAMES)) clf = estimator.DNNClassifier(hidden_units=[10, 10], feature_columns=my_feature_columns, n_classes=3, batch_norm=True)
def train_and_evaluate_dist( table_id: str, job_dir: str, bucket_name: str, prefix: str, params: dict, job_name=None, task_index=-1, num_workers=1, ): """ TODO: description :param table_id: :param params: :return: """ global global_table_id global global_params global TASK_INDEX global NUM_WORKERS global JOB_NAME global BUCKET_NAME global PREFIX global_table_id = table_id # params['batch_size'] = params['batch_size'] * NUM_WORKERS global_params = params JOB_NAME = job_name TASK_INDEX = task_index NUM_WORKERS = num_workers BUCKET_NAME = bucket_name PREFIX = prefix # strategy = tf.distribute.MirroredStrategy() # strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() strategy = tf.distribute.experimental.ParameterServerStrategy() tf.get_logger().info("NTC_DEBUG: Number of devices in strategy: {}".format( strategy.num_replicas_in_sync)) tf.summary.trace_on(graph=False, profiler=False) train_steps_per_epoch = math.ceil( data.get_sample_count(table_id, partition='train') / params['batch_size']) config = tf.estimator.RunConfig( log_step_count_steps=global_params['log_step_count_steps'], save_summary_steps=global_params['summary_write_steps'], # Evaluate every quarter through the epoch save_checkpoints_steps=math.floor(train_steps_per_epoch * .25), # session_config=get_session_config(job_name, task_index), train_distribute=strategy, eval_distribute=strategy) classifier = tf.estimator.Estimator( model_fn=model_fn, model_dir=make_job_output(job_dir, global_params['no_generated_job_path']), config=config) if global_params['data_source'] == 'bigquery': input_fn_train = input_fn_train_bq input_fn_eval = input_fn_eval_bq elif global_params['data_source'] == 'avro': input_fn_train = input_fn_train_avro input_fn_eval = input_fn_eval_avro tf.estimator.train_and_evaluate( classifier, train_spec=tf.estimator.TrainSpec(input_fn=input_fn_train, max_steps=train_steps_per_epoch * params['epochs']), eval_spec=tf.estimator.EvalSpec( input_fn=input_fn_eval, steps=math.ceil( data.get_sample_count(table_id, partition='validation') / params['batch_size']), # throttle_secs=60, ))
def custom_transformer_training_example(): run_type = 'train' # Run type: 'train' or 'translate'. train_features_filepath = './toy-ende/src-train.txt' # Path to the source file. train_labels_filepath = './toy-ende/tgt-train.txt' # Path to the target file. eval_features_filepath = './toy-ende/src-val.txt' # Path to the source file. eval_labels_filepath = './toy-ende/tgt-val.txt' # Path to the target file. source_vocabulary_filepath = './toy-ende/src-vocab.txt' # Path to the source vocabulary. target_vocabulary_filepath = './toy-ende/tgt-vocab.txt' # Path to the target vocabulary. model_dir_path = './checkpoint' # Directory where checkpoint are written. # See http://opennmt.net/OpenNMT-tf/configuration.html for a complete specification of the configuration. data_config = { 'source_vocabulary': source_vocabulary_filepath, 'target_vocabulary': target_vocabulary_filepath, } #-------------------- # Define the model. # For the purpose of this example, the model components (encoder, decoder, etc.) will be called separately. model = onmt.models.SequenceToSequence( source_inputter=onmt.inputters.WordEmbedder(embedding_size=512), target_inputter=onmt.inputters.WordEmbedder(embedding_size=512), encoder=onmt.encoders.SelfAttentionEncoder(num_layers=6, num_units=512, num_heads=8, ffn_inner_dim=2048, dropout=0.1, attention_dropout=0.1, ffn_dropout=0.1), decoder=onmt.decoders.SelfAttentionDecoder(num_layers=6, num_units=512, num_heads=8, ffn_inner_dim=2048, dropout=0.1, attention_dropout=0.1, ffn_dropout=0.1)) # Define the learning rate schedule and the optimizer. learning_rate = onmt.schedules.NoamDecay(scale=2.0, model_dim=512, warmup_steps=8000) optimizer = tfa.optimizers.LazyAdam(learning_rate) # Track the model and optimizer weights. checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) #-------------------- model.initialize(data_config) checkpoint_manager = tf.train.CheckpointManager(checkpoint, model_dir_path, max_to_keep=5) if checkpoint_manager.latest_checkpoint is not None: tf.get_logger().info('Restoring parameters from %s', checkpoint_manager.latest_checkpoint) checkpoint.restore(checkpoint_manager.latest_checkpoint) if run_type == 'train': train(model, optimizer, learning_rate, train_features_filepath, train_labels_filepath, checkpoint_manager) elif run_type == 'translate': translate(model, train_features_filepath)
import cv2 import numpy as np from bamot.config import CONFIG as config from bamot.core.base_types import (CameraParameters, Feature, FeatureMatcher, Landmark, Match, StereoCamera) from g2o import AngleAxis from PIL import Image, ImageDraw if TYPE_CHECKING: import tensorflow as tf if config.FEATURE_MATCHER != "orb": import bamot.thirdparty.SuperPoint.superpoint.match_features_demo as sp import tensorflow as tf tf.get_logger().setLevel(logging.ERROR) # surpress TF1 -> TF2 warnings tf.config.threading.set_inter_op_parallelism_threads( 2 ) # s.t. extraction can run in parallel if tf.config.list_physical_devices("GPU"): LOADED = tf.saved_model.load(config.SUPERPOINT_WEIGHTS_PATH) MODEL = LOADED.signatures["serving_default"] LOGGER = logging.getLogger("UTIL:CV") class TriangulationError(Exception): pass def get_oobbox_vec(pos: np.ndarray, yaw: np.ndarray, dims: np.ndarray) -> np.ndarray:
def train( model, optimizer, learning_rate, source_file, target_file, checkpoint_manager, maximum_length=100, shuffle_buffer_size=-1, # Uniform shuffle. train_steps=100000, save_every=1000, report_every=100): """Runs the training loop. Args: source_file: The source training file. target_file: The target training file. checkpoint_manager: The checkpoint manager. maximum_length: Filter sequences longer than this. shuffle_buffer_size: How many examples to load for shuffling. train_steps: Train for this many iterations. save_every: Save a checkpoint every this many iterations. report_every: Report training progress every this many iterations. """ # Create the training dataset. dataset = model.examples_inputter.make_training_dataset( source_file, target_file, batch_size=3072, batch_type='tokens', shuffle_buffer_size=shuffle_buffer_size, length_bucket_width= 1, # Bucketize sequences by the same length for efficiency. maximum_features_length=maximum_length, maximum_labels_length=maximum_length) @tf.function(input_signature=dataset.element_spec) def training_step(source, target): # Run the encoder. source_inputs = model.features_inputter(source, training=True) encoder_outputs, _, _ = model.encoder(source_inputs, source['length'], training=True) # Run the decoder. target_inputs = model.labels_inputter(target, training=True) decoder_state = model.decoder.initial_state( memory=encoder_outputs, memory_sequence_length=source['length']) logits, _, _ = model.decoder(target_inputs, target['length'], state=decoder_state, training=True) # Compute the cross entropy loss. loss_num, loss_den, _ = onmt.utils.cross_entropy_sequence_loss( logits, target['ids_out'], target['length'], label_smoothing=0.1, average_in_time=True, training=True) loss = loss_num / loss_den # Compute and apply the gradients. variables = model.trainable_variables gradients = optimizer.get_gradients(loss, variables) optimizer.apply_gradients(list(zip(gradients, variables))) return loss # Runs the training loop. for source, target in dataset: loss = training_step(source, target) step = optimizer.iterations.numpy() if step % report_every == 0: tf.get_logger().info('Step = %d ; Learning rate = %f ; Loss = %f', step, learning_rate(step), loss) if step % save_every == 0: tf.get_logger().info('Saving checkpoint for step %d', step) checkpoint_manager.save(checkpoint_number=step) if step == train_steps: break
def main(_): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) model = ModelMLossHead(size=cfg['input_size'], embd_shape=cfg['embd_shape'], backbone_type=cfg['backbone_type'], training=True, # here equal false, just get the model without acrHead, to load the model trained by arcface cfg=cfg) cifar = Cifar(cfg['batch_size']) train_dataset = cifar.build_training_data() val_dataset = cifar.build_validation_data() dataset_len = cfg['num_samples'] steps_per_epoch = dataset_len // cfg['batch_size'] learning_rate = tf.constant(cfg['base_lr']) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) # optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9) # optimiser = tf.train.MomentumOptimizer(learning_rate,momentum=0.9, ) for x in model.trainable_weights: print("trainable:",x.name) print('\n') model.summary(line_length=80) ckpt_path = tf.train.latest_checkpoint('./checkpoints/' + cfg['sub_name']) if ckpt_path is not None: print("[*] load ckpt from {}".format(ckpt_path)) model.load_weights(ckpt_path) epochs, steps = get_ckpt_inf(ckpt_path, steps_per_epoch) else: print("[*] training from scratch.") epochs, steps = 1, 1 if FLAGS.mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training summary_writer = tf.summary.create_file_writer( './logs/' + cfg['sub_name']) train_dataset = iter(train_dataset) while epochs <= cfg['epochs']: if steps % 5 == 0: start = time.time() inputs, labels = next(train_dataset) #print(inputs[0][1][:]) labels[2][:] with tf.GradientTape() as tape: logist = model((inputs, labels), training=True) reg_loss = tf.cast(tf.reduce_sum(model.losses),tf.double) pred_loss = 0.0 # logist = tf.cast(logist,tf.double) total_loss = reg_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if steps % 5 == 0: end = time.time() verb_str = "Epoch {}/{}: {}/{}, loss={:.2f}, lr={:.4f}, time per step={:.2f}s, remaining time 4 this epoch={:.2f}min" print(verb_str.format(epochs, cfg['epochs'], steps % steps_per_epoch, steps_per_epoch, total_loss.numpy(), learning_rate.numpy(),end - start,(steps_per_epoch -(steps % steps_per_epoch)) * (end - start) /60.0)) with summary_writer.as_default(): tf.summary.scalar( 'loss/total loss', total_loss, step=steps) tf.summary.scalar( 'loss/pred loss', pred_loss, step=steps) tf.summary.scalar( 'loss/reg loss', reg_loss, step=steps) tf.summary.scalar( 'learning rate', optimizer.lr, step=steps) if steps % cfg['save_steps'] == 0: print('[*] save ckpt file!') model.save_weights('checkpoints/{}/e_{}_b_{}.ckpt'.format( cfg['sub_name'], epochs, steps % steps_per_epoch)) steps += 1 epochs = steps // steps_per_epoch + 1 else: print("[*] only support eager_tf!") model.compile(optimizer=optimizer, loss=None) mc_callback = ModelCheckpoint( 'checkpoints/' + cfg['sub_name'] + '/e_{epoch}_b_{batch}.ckpt', save_freq=cfg['save_steps'] * cfg['batch_size'], verbose=1, save_weights_only=True) tb_callback = TensorBoard(log_dir='logs/'+ cfg['sub_name'], update_freq=cfg['batch_size'] * 5, profile_batch=0) tb_callback._total_batches_seen = steps tb_callback._samples_seen = steps * cfg['batch_size'] callbacks = [mc_callback, tb_callback] def batch_generator(train_dataset): train_dataset = iter(train_dataset) while True: inputs, labels = next(train_dataset) #print(inputs[0][1][:]) labels[2][:] yield [inputs, labels] model.fit_generator(batch_generator(train_dataset), epochs=cfg['epochs'], steps_per_epoch=steps_per_epoch, callbacks=callbacks, initial_epoch=epochs - 1) print("[*] training done!")
def __call__( self, dataset, max_step=None, accum_steps=1, report_steps=100, save_steps=5000, evaluator=None, eval_steps=5000, moving_average_decay=None, ): """Runs the training. Args: dataset: A ``tf.data.Dataset`` or a function taking a ``tf.distribute.InputContext`` instance and returning a ``tf.data.Dataset``. max_step: The final training step. accum_steps: The number of gradient accumulation steps. report_steps: Report status every this many steps. save_steps: Save a checkpoint every this many steps. evaluator: A :class:`opennmt.evaluation.Evaluator` instance to call for evaluation. eval_steps: Evaluate every this many steps. moving_average_decay: If set, maintain an exponential moving average of the model variables using this decay value (usually close to 1, e.g. 0.9999). See https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage. Returns: A dictionary with various training statistics. """ if max_step is not None and self._optimizer.iterations.numpy( ) >= max_step: raise RuntimeError( "The training already reached max_step (%d). If you " "want to continue the training, you should increase the " "max_step value in the training parameters." % max_step) if evaluator is not None and evaluator.should_stop(): raise RuntimeError( "The early stopping conditions are already met. If you " "want to continue the training, you should update your " "early stopping parameters.") self._gradient_accumulator.reset() with self._summary_writer.as_default(): self._training_stats = TrainingStats( self._model, self._optimizer, reduce_fn=self._all_reduce_sum) iterations = self._optimizer.iterations tf.summary.experimental.set_step(iterations) step = None moving_average = None for loss in self._steps(dataset, accum_steps=accum_steps, report_steps=report_steps): if tf.math.is_nan(loss): raise RuntimeError("Model diverged with loss = NaN.") if moving_average_decay is not None and self._is_master: if moving_average is None: moving_average = MovingAverage( self._model.trainable_variables, iterations, decay=moving_average_decay, ) self._update_moving_average(moving_average) step = iterations.numpy() reset_throughput = False self._training_stats.update_on_step(step, loss) if step % report_steps == 0: self._training_stats.log(self._is_master) reset_throughput = True if step == 1 or (save_steps is not None and step % save_steps == 0): self._save_checkpoint(step, moving_average=moving_average) reset_throughput = True if eval_steps is not None and step % eval_steps == 0: early_stop = self._evaluate(evaluator, step, moving_average=moving_average) reset_throughput = True if early_stop: tf.get_logger().warning( "Early stopping conditions are met. Exiting.") break if step == max_step: break if reset_throughput: self._training_stats.reset_throughput() if step is None: raise RuntimeError( "No training steps were executed. This usually means the " "training file is empty or all examples were filtered out. " "For the latter, verify that the maximum_*_length values are " "consistent with your data.") self._training_stats.log_final(self._is_master) summary = self._training_stats.get_global_summary() self._save_checkpoint(step, moving_average=moving_average) self._evaluate(evaluator, step, moving_average=moving_average) return summary
def __init__(self, config_builder): """Constructor for WitWidgetBase. Args: config_builder: WitConfigBuilder object containing settings for WIT. """ tf.get_logger().setLevel(logging.WARNING) config = config_builder.build() copied_config = dict(config) self.estimator_and_spec = (dict(config.get('estimator_and_spec')) if 'estimator_and_spec' in config else {}) self.compare_estimator_and_spec = ( dict(config.get('compare_estimator_and_spec')) if 'compare_estimator_and_spec' in config else {}) if 'estimator_and_spec' in copied_config: del copied_config['estimator_and_spec'] if 'compare_estimator_and_spec' in copied_config: del copied_config['compare_estimator_and_spec'] self.custom_predict_fn = config.get('custom_predict_fn') self.compare_custom_predict_fn = config.get( 'compare_custom_predict_fn') self.custom_distance_fn = config.get('custom_distance_fn') self.adjust_prediction_fn = config.get('adjust_prediction') self.compare_adjust_prediction_fn = config.get( 'compare_adjust_prediction') self.adjust_example_fn = config.get('adjust_example') self.compare_adjust_example_fn = config.get('compare_adjust_example') self.adjust_attribution_fn = config.get('adjust_attribution') self.compare_adjust_attribution_fn = config.get( 'compare_adjust_attribution') if 'custom_predict_fn' in copied_config: del copied_config['custom_predict_fn'] if 'compare_custom_predict_fn' in copied_config: del copied_config['compare_custom_predict_fn'] if 'custom_distance_fn' in copied_config: del copied_config['custom_distance_fn'] copied_config['uses_custom_distance_fn'] = True if 'adjust_prediction' in copied_config: del copied_config['adjust_prediction'] if 'compare_adjust_prediction' in copied_config: del copied_config['compare_adjust_prediction'] if 'adjust_example' in copied_config: del copied_config['adjust_example'] if 'compare_adjust_example' in copied_config: del copied_config['compare_adjust_example'] if 'adjust_attribution' in copied_config: del copied_config['adjust_attribution'] if 'compare_adjust_attribution' in copied_config: del copied_config['compare_adjust_attribution'] self.set_examples(config['examples']) del copied_config['examples'] self.config = copied_config # If using AI Platform for prediction, set the correct custom prediction # functions. if self.config.get('use_aip'): self.custom_predict_fn = self._predict_aip_model if self.config.get('compare_use_aip'): self.compare_custom_predict_fn = self._predict_aip_compare_model # If using JSON input (not Example protos) and a custom predict # function, then convert examples to JSON before sending to the # custom predict function. if self.config.get('uses_json_input'): if self.custom_predict_fn is not None: user_predict = self.custom_predict_fn def wrapped_custom_predict_fn(examples): return user_predict(self._json_from_tf_examples(examples)) self.custom_predict_fn = wrapped_custom_predict_fn if self.compare_custom_predict_fn is not None: compare_user_predict = self.compare_custom_predict_fn def wrapped_compare_custom_predict_fn(examples): return compare_user_predict( self._json_from_tf_examples(examples)) self.compare_custom_predict_fn = wrapped_compare_custom_predict_fn
def __call__(self, step): """Runs the evaluator. Args: step: The current training step. Returns: A dictionary of evaluation metrics. """ tf.get_logger().info("Running evaluation for step %d", step) output_file = None output_path = None if self._save_predictions: output_path = os.path.join(self._eval_dir, "predictions.txt.%d" % step) output_file = tf.io.gfile.GFile(output_path, "w") write_fn = lambda prediction: (self._model.print_prediction( prediction, stream=output_file)) index_fn = lambda prediction: prediction.get("index") ordered_writer = misc.OrderRestorer(index_fn, write_fn) loss_num = 0 loss_den = 0 metrics = self._model.get_metrics() for source, target in self._dataset: loss, predictions = self._eval_fn(source, target) if isinstance(loss, tuple): loss_num += loss[0] loss_den += loss[1] else: loss_num += loss loss_den += 1 if metrics: self._model.update_metrics(metrics, predictions, target) if output_file is not None: predictions = {k: v.numpy() for k, v in predictions.items()} for prediction in misc.extract_batches(predictions): ordered_writer.push(prediction) if loss_den == 0: raise RuntimeError("No examples were evaluated") loss = loss_num / loss_den results = dict(loss=loss, perplexity=tf.math.exp(loss)) if metrics: for name, metric in metrics.items(): results[name] = metric.result() if self._save_predictions: tf.get_logger().info("Evaluation predictions saved to %s", output_path) output_file.close() for scorer in self._scorers: score = scorer(self._labels_file, output_path) if isinstance(score, dict): results.update(score) else: results[scorer.name] = score for name, value in results.items(): if isinstance(value, tf.Tensor): results[name] = value.numpy() self._record_results(step, results) self._maybe_export(step, results) self._maybe_garbage_collect_exports() return results
#!/usr/bin/env python3 # Copyright (c) 2020 Graphcore Ltd. All rights reserved. import warnings warnings.simplefilter(action='ignore', category=FutureWarning) import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf tf.get_logger().setLevel('ERROR') import argparse import numpy as np import random import subprocess import sys def print_beam(output_sequence, probability): print("{o}".format(o=" ".join([str(x) for x in output_sequence]))) print("P = {p:.4f}".format(p=probability)) print("Log(P) = {p:.4f}".format(p=np.log(probability))) def beam_search_tf(softmax_input, beam_width=4, top_paths=1): print("tensorflow:") sess = tf.Session() # Shape : [max_time, batch_size, num_classes]
def main(_argv): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th, score_th=FLAGS.score_th) # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(model=model) if tf.train.latest_checkpoint(checkpoint_dir): checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) print("[*] load ckpt from {}.".format( tf.train.latest_checkpoint(checkpoint_dir))) else: print("[*] Cannot find ckpt from {}.".format(checkpoint_dir)) exit() # evaluation on testing dataset testset_folder = cfg['testing_dataset_path'] testset_list = os.path.join(testset_folder, 'label.txt') img_paths, _ = load_info(testset_list) for img_index, img_path in enumerate(img_paths): print(" [{} / {}] det {}".format(img_index + 1, len(img_paths), img_path)) img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) img_height_raw, img_width_raw, _ = img_raw.shape img = np.float32(img_raw.copy()) if img_height_raw == 720: resize = 2.0 else: resize = 1.0 img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # pad input image to avoid unmatched shape problem img, pad_params = pad_input_image(img, max_steps=max(cfg['steps'])) # run model outputs = model(img[np.newaxis, ...]).numpy() # recover padding effect outputs = recover_pad_output(outputs, pad_params) # write results img_name = os.path.basename(img_path) sub_dir = os.path.basename(os.path.dirname(img_path)) save_name = os.path.join(FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt')) pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(parents=True, exist_ok=True) with open(save_name, "w") as file: bboxs = outputs[:, :4] clsids = outputs[:, 14] confs = outputs[:, -1] file_name = img_name + "\n" bboxs_num = str(len(bboxs)) + "\n" file.write(file_name) file.write(bboxs_num) for box, conf, clsid in zip(bboxs, confs, clsids): x = int(box[0] * img_width_raw) y = int(box[1] * img_height_raw) w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw) h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw) confidence = str(conf) clsid = str(clsid) line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) \ + " " + confidence + " " + clsid + " \n" file.write(line) # save images pathlib.Path(os.path.join('./results', cfg['sub_name'], sub_dir)).mkdir(parents=True, exist_ok=True) if FLAGS.save_image: for prior_index in range(len(outputs)): #if outputs[prior_index][15] >= FLAGS.vis_th: # draw_bbox_landm(img_raw, outputs[prior_index], # img_height_raw, img_width_raw) if outputs[prior_index][14] == 1: draw_bbox_landm(img_raw, outputs[prior_index], img_height_raw, img_width_raw) else: draw_bbox_landm_mask(img_raw, outputs[prior_index], img_height_raw, img_width_raw) cv2.imwrite( os.path.join('./results', cfg['sub_name'], sub_dir, img_name), img_raw)
#!/usr/bin/env python # coding: utf-8 """ Object Detection (On Image) From TF2 Saved Model ===================================== """ import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging (1) import pathlib import tensorflow as tf import cv2 import argparse tf.get_logger().setLevel('ERROR') # Suppress TensorFlow logging (2) parser = argparse.ArgumentParser() parser.add_argument('--model', help='Folder that the Saved Model is Located In', default='exported-models/my_model') parser.add_argument('--labels', help='Where the Labelmap is Located', default='annotations/label_map.pbtxt') parser.add_argument('--image', help='Name of the single image to perform detection on', default='image-test/cam06_655.jpg') parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects', default=0.5) args = parser.parse_args() # Enable GPU dynamic memory allocation gpus = tf.config.experimental.list_physical_devices('GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True)
def main(_): # init os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu logger = tf.get_logger() logger.disabled = True logger.setLevel(logging.FATAL) set_memory_growth() cfg = load_yaml(FLAGS.cfg_path) # define network model = RetinaFaceModel(cfg, training=True) model.summary(line_length=80) # define prior box priors = prior_box((cfg['input_size'], cfg['input_size']), cfg['min_sizes'], cfg['steps'], cfg['clip']) # load dataset train_dataset = load_dataset(cfg, priors, shuffle=True) # define optimizer steps_per_epoch = cfg['dataset_len'] // cfg['batch_size'] learning_rate = MultiStepWarmUpLR( initial_learning_rate=cfg['init_lr'], lr_steps=[e * steps_per_epoch for e in cfg['lr_decay_epoch']], lr_rate=cfg['lr_rate'], warmup_steps=cfg['warmup_epoch'] * steps_per_epoch, min_lr=cfg['min_lr']) optimizer = tf.keras.optimizers.SGD( learning_rate=learning_rate, momentum=0.9, nesterov=True) # define losses function multi_box_loss = MultiBoxLoss() # load checkpoint checkpoint_dir = './checkpoints/' + cfg['sub_name'] checkpoint = tf.train.Checkpoint(step=tf.Variable(0, name='step'), optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=checkpoint_dir, max_to_keep=3) if manager.latest_checkpoint: checkpoint.restore(manager.latest_checkpoint) print('[*] load ckpt from {} at step {}.'.format( manager.latest_checkpoint, checkpoint.step.numpy())) else: print("[*] training from scratch.") # define training step function @tf.function def train_step(inputs, labels): with tf.GradientTape() as tape: predictions = model(inputs, training=True) losses = {} losses['reg'] = tf.reduce_sum(model.losses) losses['loc'], losses['landm'], losses['class'] = \ multi_box_loss(labels, predictions) total_loss = tf.add_n([l for l in losses.values()]) grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return total_loss, losses # training loop summary_writer = tf.summary.create_file_writer('./logs/' + cfg['sub_name']) remain_steps = max( steps_per_epoch * cfg['epoch'] - checkpoint.step.numpy(), 0) prog_bar = ProgressBar(steps_per_epoch, checkpoint.step.numpy() % steps_per_epoch) for inputs, labels in train_dataset.take(remain_steps): checkpoint.step.assign_add(1) steps = checkpoint.step.numpy() total_loss, losses = train_step(inputs, labels) if steps % 100 == 0: prog_bar.update("epoch={}/{}, loss={:.4f}, lr={:.1e}".format( ((steps - 1) // steps_per_epoch) + 1, cfg['epoch'], total_loss.numpy(), optimizer.lr(steps).numpy())) with summary_writer.as_default(): tf.summary.scalar( 'loss/total_loss', total_loss, step=steps) for k, l in losses.items(): tf.summary.scalar('loss/{}'.format(k), l, step=steps) tf.summary.scalar( 'learning_rate', optimizer.lr(steps), step=steps) if steps % cfg['save_steps'] == 0: manager.save() print("\n[*] save ckpt file at {}".format( manager.latest_checkpoint)) manager.save() print("\n[*] training done! save ckpt file at {}".format( manager.latest_checkpoint))
'uniform', 'uniform-at', 'user', 'user-at', 'item', 'item-at', 'both', 'both-at', 'nb', 'nb-at', 'nb_true', 'nb_true-at' ] parser = argparse.ArgumentParser() parser.add_argument('--data', '-d', type=str, required=True) parser.add_argument('--model_name', '-m', type=str, choices=possible_model_names, required=True) parser.add_argument('--tuning', '-t', action='store_true') if __name__ == "__main__": warnings.filterwarnings("ignore") tf.get_logger().setLevel("ERROR") args = parser.parse_args() # hyper-parameters config = yaml.safe_load(open('../config.yaml', 'r')) eta = config['eta'] batch_size = config['batch_size'] max_iters = config['max_iters'] pre_iters = config['pre_iters'] post_steps = config['post_steps'] post_iters = config['post_iters'] num_sims = config['num_sims'] n_trials = config['n_trials'] model_name = args.model_name tuning = args.tuning data = args.data
logger = logging.getLogger(__name__.split(".")[0]) logger.setLevel(_log_level) # Create and format the log handler _console_handler = logging.StreamHandler() _console_handler.setLevel(_log_level) _console_format = logging.Formatter("[%(levelname)s] (%(name)s) %(message)s") _console_handler.setFormatter(_console_format) logger.addHandler(_console_handler) # pdfflow options set, now import tensorfow to prepare convenience wrappers # and set any options that we need os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "1") import tensorflow as tf tf.get_logger().setLevel(_tf_log_level) def run_eager(flag=True): """Wrapper around `run_functions_eagerly` When used no function is compiled """ if tf.__version__ < "2.3.0": tf.config.experimental_run_functions_eagerly(flag) else: tf.config.run_functions_eagerly(flag) # set the precision type if _float_env == "64": DTYPE = tf.float64
def predict(self, input_data, inputs_err=None): """ Test model, High performance version designed for fast variational inference on GPU :param input_data: Data to be inferred with neural network :type input_data: ndarray :param inputs_err: Error for input_data, same shape with input_data. :type inputs_err: Union([NoneType, ndarray]) :return: prediction and prediction uncertainty :History: | 2018-Jan-06 - Written - Henry Leung (University of Toronto) | 2018-Apr-12 - Updated - Henry Leung (University of Toronto) """ self.has_model_check() if gpu_availability() is False and self.mc_num > 25: warnings.warn( f'You are using CPU version Tensorflow, doing {self.mc_num} times Monte Carlo Inference can ' f'potentially be very slow! \n ' f'A possible fix is to decrease the mc_num parameter of the model to do less MC Inference \n' f'This is just a warning, and will not shown if mc_num < 25 on CPU' ) if self.mc_num < 2: raise AttributeError("mc_num cannot be smaller than 2") # if no error array then just zeros if inputs_err is None: inputs_err = np.zeros_like(input_data) else: inputs_err = np.atleast_2d(inputs_err) inputs_err /= self.input_std['input'] input_data = {"input": input_data, "input_err": inputs_err} input_data = self.pre_testing_checklist_master(input_data) if self.input_normalizer is not None: input_array = self.input_normalizer.normalize(input_data, calc=False) else: # Prevent shallow copy issue input_array = np.array(input_data) input_array -= self.input_mean['input'] input_array /= self.input_std['input'] total_test_num = input_data['input'].shape[0] # Number of testing data # for number of training data smaller than batch_size if total_test_num < self.batch_size: batch_size = total_test_num else: batch_size = self.batch_size # Due to the nature of how generator works, no overlapped prediction data_gen_shape = (total_test_num // batch_size) * batch_size remainder_shape = total_test_num - data_gen_shape # Remainder from generator norm_data_main = {} norm_data_remainder = {} for name in input_array.keys(): norm_data_main.update({name: input_array[name][:data_gen_shape]}) norm_data_remainder.update( {name: input_array[name][data_gen_shape:]}) # Data Generator for prediction with tqdm(total=total_test_num, unit="sample") as pbar: pbar.set_postfix({'Monte-Carlo': self.mc_num}) # suppress pfor warning from TF old_level = tf.get_logger().level tf.get_logger().setLevel('ERROR') prediction_generator = BayesianCNNPredDataGenerator( batch_size=batch_size, shuffle=False, steps_per_epoch=data_gen_shape // batch_size, data=[norm_data_main], pbar=pbar) new = FastMCInference(self.mc_num)(self.keras_model_predict) result = np.asarray(new.predict(prediction_generator)) if remainder_shape != 0: # deal with remainder remainder_generator = BayesianCNNPredDataGenerator( batch_size=remainder_shape, shuffle=False, steps_per_epoch=1, data=[norm_data_remainder], pbar=pbar) remainder_result = np.asarray(new.predict(remainder_generator)) if remainder_shape == 1: remainder_result = np.expand_dims(remainder_result, axis=0) result = np.concatenate((result, remainder_result)) tf.get_logger().setLevel(old_level) # in case only 1 test data point, in such case we need to add a dimension if result.ndim < 3 and batch_size == 1: result = np.expand_dims(result, axis=0) half_first_dim = result.shape[ 1] // 2 # result.shape[1] is guarantee an even number, otherwise sth is wrong predictions = result[:, :half_first_dim, 0] # mean prediction mc_dropout_uncertainty = result[:, :half_first_dim, 1] * ( self.labels_std['output']**2) # model uncertainty predictions_var = np.exp(result[:, half_first_dim:, 0]) * ( self.labels_std['output']**2) # predictive uncertainty if self.labels_normalizer is not None: predictions = self.labels_normalizer.denormalize( list_to_dict([self.keras_model.output_names[0]], predictions)) predictions = predictions['output'] else: predictions *= self.labels_std['output'] predictions += self.labels_mean['output'] if self.task == 'regression': # Predictive variance pred_var = predictions_var + mc_dropout_uncertainty # epistemic plus aleatoric uncertainty pred_uncertainty = np.sqrt(pred_var) # Convert back to std error # final correction from variance to standard derivation mc_dropout_uncertainty = np.sqrt(mc_dropout_uncertainty) predictive_uncertainty = np.sqrt(predictions_var) elif self.task == 'classification': # we want entropy for classification uncertainty predicted_class = np.argmax(predictions, axis=1) mc_dropout_uncertainty = np.ones_like(predicted_class, dtype=float) predictive_uncertainty = np.ones_like(predicted_class, dtype=float) # center variance predictions_var -= 1. for i in range(predicted_class.shape[0]): all_prediction = np.array(predictions[i, :]) mc_dropout_uncertainty[i] = -np.sum( all_prediction * np.log(all_prediction)) predictive_uncertainty[i] = predictions_var[i, predicted_class[i]] pred_uncertainty = mc_dropout_uncertainty + predictive_uncertainty # We only want the predicted class back predictions = predicted_class elif self.task == 'binary_classification': # we want entropy for classification uncertainty, so need prediction in logits space mc_dropout_uncertainty = -np.sum(predictions * np.log(predictions), axis=0) # need to activate before round to int so that the prediction is always 0 or 1 predictions = np.rint(sigmoid(predictions)) predictive_uncertainty = predictions_var pred_uncertainty = mc_dropout_uncertainty + predictions_var else: raise AttributeError('Unknown Task') return predictions, { 'total': pred_uncertainty, 'model': mc_dropout_uncertainty, 'predictive': predictive_uncertainty }
def train(FLAGS): """Train yolov3 with different backbone """ prune = FLAGS['prune'] opt = FLAGS['opt'] backbone = FLAGS['backbone'] log_dir = FLAGS['log_directory'] or os.path.join( 'logs', str(backbone).split('.')[1].lower() + str(datetime.date.today())) if tf.io.gfile.exists(log_dir) is not True: tf.io.gfile.mkdir(log_dir) batch_size = FLAGS['batch_size'] train_dataset_glob = FLAGS['train_dataset'] val_dataset_glob = FLAGS['val_dataset'] test_dataset_glob = FLAGS['test_dataset'] freeze = FLAGS['freeze'] freeze_step = FLAGS['epochs'][0] train_step = FLAGS['epochs'][1] if opt == OPT.DEBUG: tf.config.experimental_run_functions_eagerly(True) tf.debugging.set_log_device_placement(True) tf.get_logger().setLevel(tf.logging.DEBUG) elif opt == OPT.XLA: config = tf.ConfigProto() config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 sess = tf.Session(config=config) tf.keras.backend.set_session(sess) class_names = get_classes(FLAGS['classes_path']) num_classes = len(class_names) anchors = get_anchors(FLAGS['anchors_path']) input_shape = FLAGS['input_size'] # multiple of 32, hw model_path = FLAGS['model'] if model_path and model_path.endswith('.h5') is not True: model_path = tf.train.latest_checkpoint(model_path) lr = FLAGS['learning_rate'] tpu_address = FLAGS['tpu_address'] if tpu_address is not None: cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu=tpu_address) tf.config.experimental_connect_to_host(cluster_resolver.master()) tf.tpu.experimental.initialize_tpu_system(cluster_resolver) strategy = tf.distribute.experimental.TPUStrategy(cluster_resolver) else: strategy = tf.distribute.MirroredStrategy(devices=FLAGS['gpus']) batch_size = batch_size * strategy.num_replicas_in_sync train_dataset_builder = Dataset(train_dataset_glob, batch_size, anchors, num_classes, input_shape) train_dataset, train_num = train_dataset_builder.build() val_dataset_builder = Dataset(val_dataset_glob, batch_size, anchors, num_classes, input_shape, mode=DATASET_MODE.VALIDATE) val_dataset, val_num = val_dataset_builder.build() map_callback = MAPCallback(test_dataset_glob, input_shape, anchors, class_names) logging = tf.keras.callbacks.TensorBoard(write_graph=False, log_dir=log_dir, write_images=True) checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) cos_lr = tf.keras.callbacks.LearningRateScheduler( lambda epoch, _: tf.keras.experimental.CosineDecay(lr[1], train_step) (epoch - freeze_step).numpy(), 1) early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', min_delta=0, patience=(freeze_step + train_step) // 10, verbose=1) if tf.version.VERSION.startswith('1.'): loss = [ lambda y_true, yolo_output: YoloLoss( y_true, yolo_output, 0, anchors, print_loss=True), lambda y_true, yolo_output: YoloLoss( y_true, yolo_output, 1, anchors, print_loss=True), lambda y_true, yolo_output: YoloLoss( y_true, yolo_output, 2, anchors, print_loss=True) ] else: loss = [ YoloLoss(idx, anchors, print_loss=False) for idx in range(len(anchors) // 3) ] with strategy.scope(): factory = ModelFactory(tf.keras.layers.Input(shape=(*input_shape, 3)), weights_path=model_path) if backbone == BACKBONE.MOBILENETV2: model = factory.build(mobilenetv2_yolo_body, 155, len(anchors) // 3, num_classes, alpha=FLAGS['alpha']) elif backbone == BACKBONE.DARKNET53: model = factory.build(darknet_yolo_body, 185, len(anchors) // 3, num_classes) elif backbone == BACKBONE.EFFICIENTNET: model = factory.build(efficientnet_yolo_body, 499, FLAGS['model_name'], len(anchors) // 3, batch_norm_momentum=0.9, batch_norm_epsilon=1e-3, num_classes=num_classes, drop_connect_rate=0.2, data_format="channels_first") if prune: from tensorflow_model_optimization.python.core.api.sparsity import keras as sparsity end_step = np.ceil(1.0 * train_num / batch_size).astype( np.int32) * train_step new_pruning_params = { 'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.5, final_sparsity=0.9, begin_step=0, end_step=end_step, frequency=1000) } pruned_model = sparsity.prune_low_magnitude(model, **new_pruning_params) pruned_model.compile(optimizer=tf.keras.optimizers.Adam(lr[0], epsilon=1e-8), loss=loss) pruned_model.fit(train_dataset, epochs=train_step, initial_epoch=0, steps_per_epoch=max(1, train_num // batch_size), callbacks=[ checkpoint, cos_lr, logging, map_callback, early_stopping ], validation_data=val_dataset, validation_steps=max(1, val_num // batch_size)) model = sparsity.strip_pruning(pruned_model) model.save_weights( os.path.join( log_dir, str(backbone).split('.')[1].lower() + '_trained_weights_pruned.h5')) with zipfile.ZipFile(os.path.join( log_dir, str(backbone).split('.')[1].lower() + '_trained_weights_pruned.h5.zip'), 'w', compression=zipfile.ZIP_DEFLATED) as f: f.write( os.path.join( log_dir, str(backbone).split('.')[1].lower() + '_trained_weights_pruned.h5')) return # Train with frozen layers first, to get a stable loss. # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. if freeze is True: with strategy.scope(): model.compile(optimizer=tf.keras.optimizers.Adam(lr[0], epsilon=1e-8), loss=loss) model.fit(train_dataset, epochs=freeze_step, initial_epoch=0, steps_per_epoch=max(1, train_num // batch_size), callbacks=[logging, checkpoint], validation_data=val_dataset, validation_steps=max(1, val_num // batch_size)) model.save_weights( os.path.join( log_dir, str(backbone).split('.')[1].lower() + '_trained_weights_stage_1.h5')) # Unfreeze and continue training, to fine-tune. # Train longer if the result is not good. else: for i in range(len(model.layers)): model.layers[i].trainable = True with strategy.scope(): model.compile(optimizer=tf.keras.optimizers.Adam(lr[1], epsilon=1e-8), loss=loss) # recompile to apply the change print('Unfreeze all of the layers.') model.fit(train_dataset, epochs=train_step + freeze_step, initial_epoch=freeze_step, steps_per_epoch=max(1, train_num // batch_size), callbacks=[ checkpoint, cos_lr, logging, map_callback, early_stopping ], validation_data=val_dataset, validation_steps=max(1, val_num // batch_size)) model.save_weights( os.path.join( log_dir, str(backbone).split('.')[1].lower() + '_trained_weights_final.h5'))