Exemple #1
0
    def __init__(self):
        # Reduce verbosity of tensorflow
        tf.get_logger().setLevel("ERROR")
        default_folder = os.path.dirname(os.path.realpath(__file__))
        self.cluster_labels_file_location = "{}/{}".format(
            default_folder, CLUSTER_LABELS_FILE)

        self.model = hub.load(
            "https://tfhub.dev/google/universal-sentence-encoder-multilingual/3"
        )

        self.candidate_cluster_names = []

        if os.path.isfile(self.cluster_labels_file_location):
            with open(self.cluster_labels_file_location, "r") as labels_file:
                self.candidate_cluster_names = labels_file.read().splitlines()
                logging.info("Found cluster labels file. %d labels loaded.",
                             len(self.candidate_cluster_names))
import os
import pickle
from typing import Any, Dict, NamedTuple, List, Iterable, Tuple

import tensorflow.compat.v2 as tf
import numpy as np
from dpu_utils.mlutils import Vocabulary
from tensorflow_core.python.keras.layers import Embedding

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
tf.get_logger().setLevel("ERROR")


class LanguageModelLoss(NamedTuple):
    token_ce_loss: tf.Tensor
    num_predictions: tf.Tensor
    num_correct_token_predictions: tf.Tensor


class BaseModel(tf.keras.Model):
    @classmethod
    def get_default_hyperparameters(cls) -> Dict[str, Any]:
        """Get the default hyperparameter dictionary for the class."""
        return {
            "optimizer": "Adam",  # One of "SGD", "RMSProp", "Adam"
            "learning_rate": 0.01,
            "learning_rate_decay": 0.98,
            "momentum": 0.85,
            "gradient_clip_value": 1,
            "max_epochs": 500,
            "patience": 5,
Exemple #3
0
        # Set up the model just to predict audio given new conditioning
        self.model = ddsp.training.models.Autoencoder()
        self.model.restore(ckpt)

        # Build model by running a batch through it.
        start_time = time.time()
        print(f'Restoring {self.model_dir}')
        _ = self.model(self.audio_features, training=False)
        print('Restoring model took %.1f seconds' % (time.time() - start_time))

    def resynth(self):
        # Resynthesize audio.
        self.controls = self.model(self.audio_features, training=False)
        audio_gen = self.model.get_audio_from_outputs(self.controls)
        return audio_gen


'''useage:
import IPython.display
import logging
tf.get_logger().setLevel(logging.ERROR)
warnings.filterwarnings("ignore")

clarinet = TimbreTransfer('violin','jolene-2.pkl')
clarinet.fit()
IPython.display.display(
play(clarinet.audio))
IPython.display.display(
play(clarinet.resynth()))

'''
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pathlib
import tensorflow.compat.v2 as tf
import cv2
import argparse
import time
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import warnings
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from helpers import corner_utils, ocr_helpers

tf.get_logger().setLevel('ERROR')
warnings.filterwarnings('ignore')

# CONFIG_GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# PROVIDE PATH TO MODEL DIRECTORY
PATH_TO_MODEL_DIR = '/home/pot/Desktop/web-scan/models/discharge_record'
PATH_TO_SAVED_MODEL = PATH_TO_MODEL_DIR + "/saved_model"

# PROVIDE PATH TO LABEL MAP
PATH_TO_LABELS = '/home/pot/Desktop/web-scan/models/discharge_record/label_map.pbtxt'

# PROVIDE THE MINIMUM CONFIDENCE THRESHOLD
def main(unused_argv):

    # Set the random seed for the whole graph for reproductible experiments
    tf.random.set_seed(230)
    print("TensorFlow version: ", tf.__version__)
    assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."
    tf.get_logger().setLevel(logging.ERROR)
    # strategy = tf.compat.v2.distribute.MirroredStrategy()

    # ste the gpu (device:GPU:0)
    print("Num GPUs Available: ",
          len(tf.config.experimental.list_physical_devices('GPU')))
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
            tf.config.experimental.set_memory_growth(gpus[0], True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus),
                  "Logical GPU")
        except RuntimeError as e:
            # Visible devices must be set before GPUs have been initialized
            print(e)

    flags.mark_flag_as_required('model_dir')
    flags.mark_flag_as_required('data_dir')
    flags.mark_flag_as_required('stn_dir')

    # Load the parameters from json file
    json_path = os.path.join(FLAGS.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # check if the data is available
    assert os.path.exists(FLAGS.data_dir), "No data file found at {}".format(
        FLAGS.data_dir)

    # check if the log file is available
    if not os.path.exists(FLAGS.loging_dir):
        os.mkdir(FLAGS.loging_dir)

    train_data_dir = os.path.join(FLAGS.data_dir, 'train')
    eval_data_dir = os.path.join(FLAGS.data_dir, 'eval')

    # Get the filenames from the train and dev sets
    train_filenames = [
        os.path.join(train_data_dir, f) for f in os.listdir(train_data_dir)
    ]
    eval_filenames = [
        os.path.join(eval_data_dir, f) for f in os.listdir(eval_data_dir)
    ]

    # Get the train images list
    images_list_train = glob.glob(train_filenames[0] + '/*.jpg')
    images_list_eval = glob.glob(eval_filenames[0] + '/*.jpg')

    # Get the label forces
    force_list_train = load_force_txt(train_filenames[1] + '/force.txt',
                                      len(images_list_train))
    force_list_eval = load_force_txt(eval_filenames[1] + '/force.txt',
                                     len(images_list_eval))

    # Specify the sizes of the dataset we train on and evaluate on
    params.train_size = len(images_list_train)
    params.eval_size = len(images_list_eval)

    # Create the two iterators over the two datasets
    print('=================================================')
    print(
        '[INFO] Dataset is built by {0} training images and {1} eval images '.
        format(len(images_list_train), len(images_list_eval)))

    tf.debugging.set_log_device_placement(False)
    train_dataset = input_fn(True,
                             images_list_train,
                             force_list_train,
                             params=params)
    eval_dataset = input_fn(False,
                            images_list_eval,
                            force_list_eval,
                            params=params)
    print('[INFO] Data pipeline is built')

    # Define the model
    print('=================================================')
    print('[INFO] Creating the model...')
    stn_module = tf.keras.models.load_model(FLAGS.stn_dir)
    model_spec = model_fn(FLAGS.mode, params, stn_module)
    if FLAGS.verbose:
        model_spec['model'].summary()

    # Train the model
    print('=================================================')
    train_model = Train_and_Evaluate(model_spec, train_dataset, eval_dataset,
                                     FLAGS.loging_dir)
    train_model.train_and_eval(params)
    print('=================================================')
Exemple #6
0
def tfp_anomaly_detection(input_dataset: Input[Dataset],
                          output_dataset: Output[Dataset],
                          time_col: str = 'timestamp',
                          feature_col: str = 'value',
                          timestamp_format: str = '%Y-%m-%d %H:%M:%S',
                          anomaly_threshold: float = 0.01,
                          use_gibbs_predictive_dist: bool = True,
                          num_warmup_steps: int = 50,
                          num_samples: int = 100,
                          jit_compile: bool = False,
                          seed: int = None):
  """Uses TFP STS detect_anomalies to regularize a time series, fit a model, and predict anomalies.

  Args:
    input_dataset: Input with GCS path to input time series csv.
    output_dataset: Output with GCS path to output predictions csv.
    time_col: Name of csv column with timestamps.
    feature_col: Name of csv column with feature values.
    timestamp_format: Datetime format to serialize timestamps with.
    anomaly_threshold: Confidence level for anomaly detection.
    use_gibbs_predictive_dist: Whether the predictive distribution is derived
      from Gibbs samples of the latent level.
    num_warmup_steps: Number of steps to take before collecting samples.
    num_samples: Number of steps to take while sampling parameter values.
    jit_compile: Whether to compile the sampler with XLA.
    seed: PRNG seed.

  Returns:
    Path to output predictions csv with the following fields
      timestamp: Timestamps from the input time series.
      value: Observed values from the input time series.
      anomaly_score: Probability that the data point is an anomaly.
      tail_probability: Probability that the data point occurs.
      label: Whether the data point is predicted to be an anomaly.
      lower_limit: Lowest acceptable forecast value from model.
      mean: Mean forecast value from model.
      upper_limit: Highest acceptable forecast value from model.
  """

  import pandas as pd
  import tensorflow.compat.v2 as tf
  import tensorflow_probability as tfp
  from tensorflow_probability.python.sts import anomaly_detection as tfp_ad
  from tensorflow_probability.python.sts.anomaly_detection.anomaly_detection_lib import PredictionOutput

  logger = tf.get_logger()

  def load_data(path: str) -> pd.DataFrame:
    """Loads pandas dataframe from csv.

    Args:
      path: Path to the csv file.

    Returns:
      A time series dataframe compatible with TFP functions.
    """
    original_df = pd.read_csv(path)
    df = pd.DataFrame()
    df['timestamp'] = pd.to_datetime(original_df[time_col])
    df['value'] = original_df[feature_col].astype('float32')
    df = df.set_index('timestamp')
    return df

  def format_predictions(predictions: PredictionOutput) -> pd.DataFrame:
    """Saves predictions in a standardized csv format and fills missing values.

    Args:
      predictions: Anomaly detection output with fields times,
        observed_time_series, is_anomaly, tail_probabilities, lower_limit, mean,
        upper_limit.

    Returns:
      predictions_df: A formatted pandas DataFrame compatible with scoring on
      the Numenta Anomaly Benchmark.
    """
    anomaly_scores = 1 - predictions.tail_probabilities
    predictions_df = pd.DataFrame(
        data={
            'timestamp': predictions.times.strftime(timestamp_format).tolist(),
            'value': predictions.observed_time_series.numpy().tolist(),
            'anomaly_score': anomaly_scores.numpy().tolist(),
            'tail_probability': predictions.tail_probabilities.numpy().tolist(),
            'label': predictions.is_anomaly.numpy().astype(int).tolist(),
            'lower_limit': predictions.lower_limit.numpy().tolist(),
            'mean': predictions.mean.numpy().tolist(),
            'upper_limit': predictions.upper_limit.numpy().tolist(),
        })
    return predictions_df

  data = load_data(input_dataset.path)
  logger.info(
      'Input dataset has {0} rows. If you run out of memory you should increase set_memory_limit in your pipeline.'
      .format(len(data)))
  predictions = tfp_ad.detect_anomalies(data, anomaly_threshold,
                                        use_gibbs_predictive_dist,
                                        num_warmup_steps, num_samples,
                                        jit_compile, seed)
  predictions_df = format_predictions(predictions)
  predictions_df.to_csv(output_dataset.path)
Exemple #7
0
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()
tf.get_logger().propagate = False
#import tensorflow as tf
import tensorflow_transform as tft

import rdi_constants

_DENSE_FLOAT_FEATURE_KEYS = rdi_constants.DENSE_FLOAT_FEATURE_KEYS
_VOCAB_FEATURE_KEYS = rdi_constants.VOCAB_FEATURE_KEYS
_VOCAB_SIZE = rdi_constants.VOCAB_SIZE
_OOV_SIZE = rdi_constants.OOV_SIZE
_FEATURE_BUCKET_COUNT = rdi_constants.FEATURE_BUCKET_COUNT
_BUCKET_FEATURE_KEYS = rdi_constants.BUCKET_FEATURE_KEYS
_CATEGORICAL_FEATURE_KEYS = rdi_constants.CATEGORICAL_FEATURE_KEYS
_FARE_KEY = rdi_constants.FARE_KEY
_LABEL_KEY = rdi_constants.LABEL_KEY
_transformed_name = rdi_constants.transformed_name


def preprocessing_fn(inputs):
  """tf.transform's callback function for preprocessing inputs.
  Args:
    inputs: map from feature keys to raw not-yet-transformed features.
  Returns:
    Map from string feature key to transformed feature operations.
  """
  outputs = {}
  for key in _DENSE_FLOAT_FEATURE_KEYS:
    # Preserve this feature as a dense float, setting nan's to the mean.
Exemple #8
0
    else:
        if FLAGS.use_tpu:
            # TPU is automatically inferred if tpu_name is None and
            # we are running under cloud ai-platform.
            resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
                FLAGS.tpu_name)
            tf.config.experimental_connect_to_cluster(resolver)
            tf.tpu.experimental.initialize_tpu_system(resolver)
            strategy = tf.distribute.experimental.TPUStrategy(resolver)
        elif FLAGS.num_workers > 1:
            strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
        else:
            strategy = tf.compat.v2.distribute.MirroredStrategy()

        with strategy.scope():
            model_lib_v2.train_loop(
                pipeline_config_path=FLAGS.pipeline_config_path,
                model_dir=FLAGS.model_dir,
                train_steps=FLAGS.num_train_steps,
                use_tpu=FLAGS.use_tpu,
                checkpoint_every_n=FLAGS.checkpoint_every_n,
                record_summaries=FLAGS.record_summaries,
                checkpoint_max_to_keep=FLAGS.checkpoint_max_to_keep,
                save_best=FLAGS.save_best)


if __name__ == '__main__':
    logger = tf.get_logger()
    logger.propagate = False
    tf.compat.v1.app.run()