Beispiel #1
0
def tf_seq2seq_sine(request):
    # create artificial sine time series
    X = np.sin(np.linspace(-50, 50, 10000)).astype(np.float32)

    # init model
    decoder_net_, n_features = tests_seq2seq[request.param]
    encoder_net = EncoderLSTM(latent_dim)
    threshold_net = tf.keras.Sequential([
        InputLayer(input_shape=(seq_len, latent_dim)),
        Dense(10, activation=tf.nn.relu)
    ])
    model = Seq2Seq(encoder_net, decoder_net_, threshold_net, n_features)

    # reshape data
    shape = (-1, seq_len, n_features)
    y = np.roll(X, -1, axis=0).reshape(shape)
    X = X.reshape(shape)

    # predict with untrained model, train and predict with trained model
    X_recon_untrained = model(X)
    assert X_recon_untrained.shape == X.shape
    model_weights = model.weights[1].numpy().copy()
    trainer(model,
            tf.keras.losses.mse,
            X,
            y_train=y,
            epochs=2,
            verbose=False,
            batch_size=64)
    X_recon = model(X).numpy()
    assert (model_weights != model.weights[1].numpy()).any()
    assert np.sum((X - X_recon_untrained)**2) > np.sum((X - X_recon)**2)
Beispiel #2
0
    def fit(self,
            X: np.ndarray,
            loss_fn: tf.keras.losses = elbo,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(learning_rate=1e-3),
            cov_elbo: dict = dict(sim=.05),
            epochs: int = 20,
            batch_size: int = 64,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None,
            ) -> None:
        """
        Train VAE model.

        Parameters
        ----------
        X
            Training batch.
        loss_fn
            Loss function used for training.
        optimizer
            Optimizer used for training.
        cov_elbo
            Dictionary with covariance matrix options in case the elbo loss function is used.
            Either use the full covariance matrix inferred from X (dict(cov_full=None)),
            only the variance (dict(cov_diag=None)) or a float representing the same standard deviation
            for each feature (e.g. dict(sim=.05)).
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        # train arguments
        args = [self.vae, loss_fn, X]
        kwargs = {'optimizer': optimizer,
                  'epochs': epochs,
                  'batch_size': batch_size,
                  'verbose': verbose,
                  'log_metric': log_metric,
                  'callbacks': callbacks}

        # initialize covariance matrix if elbo loss fn is used
        use_elbo = loss_fn.__name__ == 'elbo'
        cov_elbo_type, cov = [*cov_elbo][0], [*cov_elbo.values()][0]
        if use_elbo and cov_elbo_type in ['cov_full', 'cov_diag']:
            cov = tfp.stats.covariance(X.reshape(X.shape[0], -1))
            if cov_elbo_type == 'cov_diag':  # infer standard deviation from covariance matrix
                cov = tf.math.sqrt(tf.linalg.diag_part(cov))
        if use_elbo:
            kwargs['loss_fn_kwargs'] = {cov_elbo_type: tf.dtypes.cast(cov, tf.float32)}

        # train
        trainer(*args, **kwargs)
Beispiel #3
0
    def fit(
        self,
        X: np.ndarray,
        loss_fn: tf.keras.losses = tf.keras.losses.mse,
        optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(
            learning_rate=1e-3),
        epochs: int = 20,
        batch_size: int = 64,
        verbose: bool = True,
        log_metric: Tuple[str, "tf.keras.metrics"] = None,
        callbacks: tf.keras.callbacks = None,
    ) -> None:
        """
        Train Seq2Seq model.

        Parameters
        ----------
        X
            Univariate or multivariate time series.
            Shape equals (batch, features) or (batch, sequence length, features).
        loss_fn
            Loss function used for training.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        # targets for teacher-forcing
        if len(X.shape) == 2:
            y = np.roll(X, -1, axis=0).reshape(self.shape)
            X = X.reshape(self.shape)
        else:
            y = np.roll(X.reshape((-1, self.shape[-1])), -1,
                        axis=0).reshape(self.shape)

        # train arguments
        args = [self.seq2seq, loss_fn, X]
        kwargs = {
            'y_train': y,
            'optimizer': optimizer,
            'epochs': epochs,
            'batch_size': batch_size,
            'verbose': verbose,
            'log_metric': log_metric,
            'callbacks': callbacks
        }

        # train
        trainer(*args, **kwargs)
Beispiel #4
0
    def fit(self,
            X: np.ndarray,
            loss_fn: tf.keras.losses = loss_aegmm,
            w_energy: float = .1,
            w_cov_diag: float = .005,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(learning_rate=1e-4),
            epochs: int = 20,
            batch_size: int = 64,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None,
            ) -> None:
        """
        Train AEGMM model.

        Parameters
        ----------
        X
            Training batch.
        loss_fn
            Loss function used for training.
        w_energy
            Weight on sample energy loss term if default `loss_aegmm` loss fn is used.
        w_cov_diag
            Weight on covariance regularizing loss term if default `loss_aegmm` loss fn is used.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        # train arguments
        args = [self.aegmm, loss_fn, X]
        kwargs = {'optimizer': optimizer,
                  'epochs': epochs,
                  'batch_size': batch_size,
                  'verbose': verbose,
                  'log_metric': log_metric,
                  'callbacks': callbacks,
                  'loss_fn_kwargs': {'w_energy': w_energy,
                                     'w_cov_diag': w_cov_diag}
                  }

        # train
        trainer(*args, **kwargs)

        # set GMM parameters
        x_recon, z, gamma = self.aegmm(X)
        self.phi, self.mu, self.cov, self.L, self.log_det_cov = gmm_params(z, gamma)
def test_trainer(trainer_params):
    X_train, y_train, loss_fn_kwargs, verbose = trainer_params
    trainer(model,
            categorical_crossentropy,
            X_train,
            y_train=y_train,
            loss_fn_kwargs=loss_fn_kwargs,
            epochs=2,
            verbose=verbose)
    assert (model.weights[0].numpy() != check_model_weights).any()
    def fit(self,
            X: np.ndarray,
            loss_fn: tf.keras.losses = loss_distillation,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(
                learning_rate=1e-3),
            epochs: int = 20,
            batch_size: int = 128,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None,
            preprocess_fn: Callable = None) -> None:
        """
        Train ModelDistillation detector.

        Parameters
        ----------
        X
            Training batch.
        loss_fn
            Loss function used for training.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        preprocess_fn
            Preprocessing function applied to each training batch.
        """
        # train arguments
        args = [self.distilled_model, loss_fn, X]
        kwargs = {
            'optimizer': optimizer,
            'epochs': epochs,
            'batch_size': batch_size,
            'verbose': verbose,
            'log_metric': log_metric,
            'callbacks': callbacks,
            'preprocess_fn': preprocess_fn,
            'loss_fn_kwargs': {
                'model': self.model,
                'loss_type': self.loss_type,
                'temperature': self.temperature
            }
        }

        # train
        trainer(*args, **kwargs)
Beispiel #7
0
    def score(self,
              x: np.ndarray) -> Tuple[float, float, np.ndarray, np.ndarray]:
        """
        Compute the out-of-fold drift metric such as the accuracy from a classifier
        trained to distinguish the reference data from the data to be tested.

        Parameters
        ----------
        x
            Batch of instances.

        Returns
        -------
        p-value, a notion of distance between the trained classifier's out-of-fold performance
        and that which we'd expect under the null assumption of no drift,
        and the out-of-fold classifier model prediction probabilities on the reference and test data
        """
        x_ref, x = self.preprocess(x)
        n_ref, n_cur = len(x_ref), len(x)
        x, y, splits = self.get_splits(x_ref, x)

        # iterate over folds: train a new model for each fold and make out-of-fold (oof) predictions
        preds_oof_list, idx_oof_list = [], []
        for idx_tr, idx_te in splits:
            y_tr = np.eye(2)[y[idx_tr]]
            if isinstance(x, np.ndarray):
                x_tr, x_te = x[idx_tr], x[idx_te]
            elif isinstance(x, list):
                x_tr, x_te = [x[_] for _ in idx_tr], [x[_] for _ in idx_te]
            else:
                raise TypeError(
                    f'x needs to be of type np.ndarray or list and not {type(x)}.'
                )
            ds_tr = self.dataset(x_tr, y_tr)
            self.model = clone_model(self.original_model) if self.retrain_from_scratch \
                else self.model
            train_args = [self.model, self.loss_fn, None]
            self.train_kwargs.update({'dataset': ds_tr})
            trainer(*train_args, **self.train_kwargs)  # type: ignore
            preds = self.predict_fn(x_te, self.model)
            preds_oof_list.append(preds)
            idx_oof_list.append(idx_te)
        preds_oof = np.concatenate(preds_oof_list, axis=0)
        probs_oof = softmax(
            preds_oof, axis=-1) if self.preds_type == 'logits' else preds_oof
        idx_oof = np.concatenate(idx_oof_list, axis=0)
        y_oof = y[idx_oof]
        p_val, dist = self.test_probs(y_oof, probs_oof, n_ref, n_cur)
        probs_sort = probs_oof[np.argsort(idx_oof)]
        return p_val, dist, probs_sort[:n_ref, 1], probs_sort[n_ref:, 1]
Beispiel #8
0
def test_trainer(trainer_params):
    x_train, y_train, dataset, loss_fn_kwargs, preprocess, verbose = trainer_params
    if dataset is not None and y_train is not None:
        ds = dataset(x_train, y_train)
    else:
        ds = None
    trainer(model,
            categorical_crossentropy,
            x_train,
            y_train=y_train,
            dataset=ds,
            loss_fn_kwargs=loss_fn_kwargs,
            preprocess_fn=preprocess,
            epochs=2,
            verbose=verbose)
    assert (model.weights[0].numpy() != check_model_weights).any()
Beispiel #9
0
    def fit(
        self,
        X: np.ndarray,
        loss_fn: tf.keras.losses = tf.keras.losses.MeanSquaredError(),
        optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(
            learning_rate=1e-3),
        epochs: int = 20,
        batch_size: int = 64,
        verbose: bool = True,
        log_metric: Tuple[str, "tf.keras.metrics"] = None,
        callbacks: tf.keras.callbacks = None,
    ) -> None:
        """
        Train AE model.

        Parameters
        ----------
        X
            Training batch.
        loss_fn
            Loss function used for training.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        # train arguments
        args = [self.ae, loss_fn, X]
        kwargs = {
            'optimizer': optimizer,
            'epochs': epochs,
            'batch_size': batch_size,
            'verbose': verbose,
            'log_metric': log_metric,
            'callbacks': callbacks
        }

        # train
        trainer(*args, **kwargs)
Beispiel #10
0
def tf_v_aegmm_mnist(request):
    # load and preprocess MNIST data
    (X_train, _), (X_test, _) = tf.keras.datasets.mnist.load_data()
    X = X_train.reshape(60000,
                        input_dim)[:1000]  # only train on 1000 instances
    X = X.astype(np.float32)
    X /= 255

    # init model, predict with untrained model, train and predict with trained model
    model, loss_fn = tests[request.param]
    X_recon_untrained, z, gamma = model(X)
    assert X_recon_untrained.shape == X.shape
    assert z.shape[1] == latent_dim + 2
    assert gamma.shape[1] == n_gmm
    model_weights = model.weights[1].numpy().copy()
    trainer(model, loss_fn, X, epochs=5, verbose=False, batch_size=1000)
    assert (model_weights != model.weights[1].numpy()).any()
    def fit(self,
            X: np.ndarray,
            loss_fn: tf.keras.losses = loss_adv_ae,
            w_model: float = 1.,
            w_recon: float = 0.,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(
                learning_rate=1e-3),
            epochs: int = 20,
            batch_size: int = 128,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None,
            preprocess_fn: Callable = None) -> None:
        """
        Train Adversarial AE model.

        Parameters
        ----------
        X
            Training batch.
        loss_fn
            Loss function used for training.
        w_model
            Weight on model prediction loss term.
        w_recon
            Weight on MSE reconstruction error loss term.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        preprocess_fn
            Preprocessing function applied to each training batch.
        """
        # train arguments
        args = [self.ae, loss_fn, X]
        kwargs = {
            'optimizer': optimizer,
            'epochs': epochs,
            'batch_size': batch_size,
            'verbose': verbose,
            'log_metric': log_metric,
            'callbacks': callbacks,
            'preprocess_fn': preprocess_fn,
            'loss_fn_kwargs': {
                'model': self.model,
                'model_hl': self.model_hl,
                'w_model': w_model,
                'w_recon': w_recon,
                'w_model_hl': self.w_model_hl,
                'temperature': self.temperature
            }
        }

        # train
        trainer(*args, **kwargs)
Beispiel #12
0
    def fit(self,
            X: np.ndarray,
            loss_fn: tf.keras.losses = loss_vaegmm,
            w_recon: float = 1e-7,
            w_energy: float = .1,
            w_cov_diag: float = .005,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(learning_rate=1e-4),
            cov_elbo: dict = dict(sim=.05),
            epochs: int = 20,
            batch_size: int = 64,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None,
            ) -> None:
        """
        Train VAEGMM model.

        Parameters
        ----------
        X
            Training batch.
        loss_fn
            Loss function used for training.
        w_recon
            Weight on elbo loss term if default `loss_vaegmm`.
        w_energy
            Weight on sample energy loss term if default `loss_vaegmm` loss fn is used.
        w_cov_diag
            Weight on covariance regularizing loss term if default `loss_vaegmm` loss fn is used.
        optimizer
            Optimizer used for training.
        cov_elbo
            Dictionary with covariance matrix options in case the elbo loss function is used.
            Either use the full covariance matrix inferred from X (dict(cov_full=None)),
            only the variance (dict(cov_diag=None)) or a float representing the same standard deviation
            for each feature (e.g. dict(sim=.05)).
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        # train arguments
        args = [self.vaegmm, loss_fn, X]
        kwargs = {'optimizer': optimizer,
                  'epochs': epochs,
                  'batch_size': batch_size,
                  'verbose': verbose,
                  'log_metric': log_metric,
                  'callbacks': callbacks,
                  'loss_fn_kwargs': {'w_recon': w_recon,
                                     'w_energy': w_energy,
                                     'w_cov_diag': w_cov_diag}
                  }

        # initialize covariance matrix if default vaegmm loss fn is used
        use_elbo = loss_fn.__name__ == 'loss_vaegmm'
        cov_elbo_type, cov = [*cov_elbo][0], [*cov_elbo.values()][0]
        if use_elbo and cov_elbo_type in ['cov_full', 'cov_diag']:
            cov = tfp.stats.covariance(X.reshape(X.shape[0], -1))
            if cov_elbo_type == 'cov_diag':  # infer standard deviation from covariance matrix
                cov = tf.math.sqrt(tf.linalg.diag_part(cov))
        if use_elbo:
            kwargs['loss_fn_kwargs'][cov_elbo_type] = tf.dtypes.cast(cov, tf.float32)

        # train
        trainer(*args, **kwargs)

        # set GMM parameters
        x_recon, z, gamma = self.vaegmm(X)
        self.phi, self.mu, self.cov, self.L, self.log_det_cov = gmm_params(z, gamma)
Beispiel #13
0
    def fit(self,
            X: np.ndarray,
            mutate_fn: Callable = mutate_categorical,
            mutate_fn_kwargs: dict = {
                'rate': .2,
                'seed': 0,
                'feature_range': (0, 255)
            },
            mutate_batch_size: int = int(1e10),
            loss_fn: tf.keras.losses = None,
            loss_fn_kwargs: dict = None,
            optimizer: tf.keras.optimizers = tf.keras.optimizers.Adam(
                learning_rate=1e-3),
            epochs: int = 20,
            batch_size: int = 64,
            verbose: bool = True,
            log_metric: Tuple[str, "tf.keras.metrics"] = None,
            callbacks: tf.keras.callbacks = None) -> None:
        """
        Train semantic and background generative models.

        Parameters
        ----------
        X
            Training batch.
        mutate_fn
            Mutation function used to generate the background dataset.
        mutate_fn_kwargs
            Kwargs for the mutation function used to generate the background dataset.
            Default values set for an image dataset.
        mutate_batch_size
            Batch size used to generate the mutations for the background dataset.
        loss_fn
            Loss function used for training.
        loss_fn_kwargs
            Kwargs for loss function.
        optimizer
            Optimizer used for training.
        epochs
            Number of training epochs.
        batch_size
            Batch size used for training.
        verbose
            Whether to print training progress.
        log_metric
            Additional metrics whose progress will be displayed if verbose equals True.
        callbacks
            Callbacks used during training.
        """
        input_shape = X.shape[1:]

        # training arguments
        kwargs = {
            'epochs': epochs,
            'batch_size': batch_size,
            'verbose': verbose,
            'callbacks': callbacks
        }

        # create background data
        mutate_fn = partial(mutate_fn, **mutate_fn_kwargs)
        X_back = predict_batch(X,
                               mutate_fn,
                               batch_size=mutate_batch_size,
                               dtype=X.dtype)

        # prepare sequential data
        if self.sequential and not self.has_log_prob:
            y, y_back = X[:, 1:], X_back[:, 1:]  # type: ignore
            X, X_back = X[:, :-1], X_back[:, :-1]  # type: ignore
        else:
            y, y_back = None, None

        # check if model needs to be built
        use_build = True if self.has_log_prob and not isinstance(
            self.dist_s, tf.keras.Model) else False

        if use_build:
            # build and train semantic model
            self.model_s = build_model(self.dist_s, input_shape)[0]
            self.model_s.compile(optimizer=optimizer)
            self.model_s.fit(X, **kwargs)
            # build and train background model
            self.model_b = build_model(self.dist_b, input_shape)[0]
            self.model_b.compile(optimizer=optimizer)
            self.model_b.fit(X_back, **kwargs)
        else:
            # update training arguments
            kwargs.update({
                'optimizer': optimizer,
                'loss_fn_kwargs': loss_fn_kwargs,
                'log_metric': log_metric
            })

            # train semantic model
            args = [self.dist_s, loss_fn, X]
            kwargs.update({'y_train': y})
            trainer(*args, **kwargs)

            # train background model
            args = [self.dist_b, loss_fn, X_back]
            kwargs.update({'y_train': y_back})
            trainer(*args, **kwargs)