def test_custom_loss_ivis_callable(model_filepath): iris = datasets.load_iris() X = iris.data class EuclideanDistance: def __init__(self, margin=1): self.margin = margin self.__name__ = self.__class__.__name__ def _euclidean_distance(self, x, y): return K.sqrt( K.maximum(K.sum(K.square(x - y), axis=-1, keepdims=True), K.epsilon())) def __call__(self, y_true, y_pred): anchor, positive, negative = tf.unstack(y_pred) return K.mean( K.maximum( self._euclidean_distance(anchor, positive) - self._euclidean_distance(anchor, negative) + self.margin, 0)) model = Ivis(distance=EuclideanDistance(margin=2), k=15, batch_size=16, epochs=5) y_pred = model.fit_transform(X) # Test model saving and loading model.save_model(model_filepath, overwrite=True) model_2 = Ivis(distance=EuclideanDistance(margin=2)) model_2.load_model(model_filepath) model_2.fit(X)
def _custom_model_saving(model_filepath, save_fn, load_fn): iris = datasets.load_iris() X = iris.data Y = iris.target # Create a custom model inputs = tf.keras.layers.Input(shape=(X.shape[-1], )) x = tf.keras.layers.Dense(8, activation='relu')(inputs) custom_model = tf.keras.Model(inputs, x) model = Ivis(k=15, batch_size=16, epochs=2, model=custom_model) model.fit(X, Y) save_fn(model, model_filepath) model_2 = load_fn(model_filepath) # Check that model embeddings are same assert np.all(model.transform(X) == model_2.transform(X)) # Check that model supervised predictions are same assert np.all(model.score_samples(X) == model_2.score_samples(X)) _validate_network_equality(model, model_2) # Train new model y_pred_2 = model_2.fit_transform(X, Y)
class DFIvis(BaseEstimator, TransformerMixin): # NOTE: # - DFIvis(embedding_dims=df.shape[1]) to remain every dimensions def __init__(self, columns=None, prefix='ivis_', **kwargs): self.columns = columns self.prefix = prefix self.model = Ivis(**kwargs) self.transform_cols = None def fit(self, X, y=None): self.columns = X.columns if self.columns is None else self.columns self.transform_cols = [x for x in X.columns if x in self.columns] self.model.fit(X[self.transform_cols].values, y.values if y is not None else y) return self def transform(self, X): if self.transform_cols is None: raise NotFittedError( f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator." ) new_X = pd.DataFrame( self.model.transform(X[self.transform_cols].values), columns=[ f'{self.prefix}{x}' for x in range(self.model.embedding_dims) ]) return new_X def fit_transform(self, X, y=None): return self.fit(X, y).transform(X)
def test_ivis_model_saving(model_filepath): model = Ivis(k=15, batch_size=16, n_epochs_without_progress=5) iris = datasets.load_iris() X = iris.data model.fit(X) model.save_model(model_filepath) model_2 = Ivis() model_2.load_model(model_filepath) # Check that model predictions are same assert np.all(model.transform(X) == model_2.transform(X)) assert model.__getstate__() == model_2.__getstate__( ) # Serializable dict eles same # Check all weights are the same for model_layer, model_2_layer in zip(model.encoder.layers, model_2.encoder.layers): model_layer_weights = model_layer.get_weights() model_2_layer_weights = model_2_layer.get_weights() for i in range(len(model_layer_weights)): assert np.all(model_layer_weights[i] == model_2_layer_weights[i]) # Check optimizer weights are the same for model_optimizer_weights, model_2_optimizer_weights in zip( model.model_.optimizer.get_weights(), model_2.model_.optimizer.get_weights()): assert np.all(model_optimizer_weights == model_2_optimizer_weights) # Check that trying to save over an existing folder raises an Exception with pytest.raises(FileExistsError) as exception_info: model.save_model(model_filepath) assert isinstance(exception_info.value, FileExistsError)
class Ivis(Transformer): """ This transformer scales all the vectors in an [EmbeddingSet][whatlies.embeddingset.EmbeddingSet] by means of Ivis algorithm. We're using the implementation found [here](https://github.com/beringresearch/ivis). Important: This language backend might require you to manually install extra dependencies unless you installed via either; ``` pip install whatlies[ivis] pip install whatlies[all] ``` Arguments: n_components: the number of compoments to create/add kwargs: keyword arguments passed to the [Ivis implementation](https://bering-ivis.readthedocs.io/en/latest/hyperparameters.html) Usage: ```python from whatlies.language import GensimLanguage from whatlies.transformers import Ivis words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman", "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire", "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water", "person", "family", "brother", "sister"] lang = GensimLanguage("wordvectors.kv") emb = lang[words] emb.transform(Ivis(3)).plot_interactive_matrix('ivis_0', 'ivis_1', 'ivis_2') ``` """ def __init__(self, n_components=2, **kwargs): super().__init__() self.n_components = n_components self.kwargs = kwargs self.kwargs["verbose"] = 0 self.tfm = IVIS(embedding_dims=self.n_components, **self.kwargs) def fit(self, embset): names, X = embset.to_names_X() self.tfm.fit(X) self.is_fitted = True return self def transform(self, embset): names, X = embset.to_names_X() new_vecs = self.tfm.transform(X) names_out = names + [f"ivis_{i}" for i in range(self.n_components)] vectors_out = np.concatenate([new_vecs, np.eye(self.n_components)]) new_dict = new_embedding_dict(names_out, vectors_out, embset) return EmbeddingSet(new_dict, name=f"{embset.name}.ivis_{self.n_components}()")
def _unsupervised_model_save_test(model_filepath, save_fn, load_fn): model = Ivis(k=15, batch_size=16, epochs=2) iris = datasets.load_iris() X = iris.data model.fit(X) save_fn(model, model_filepath) model_2 = load_fn(model_filepath) # Check that model predictions are same assert np.all(model.transform(X) == model_2.transform(X)) _validate_network_equality(model, model_2) # Train new model y_pred_2 = model_2.fit_transform(X)
def test_save_overwriting(model_filepath): model = Ivis(k=15, batch_size=16, epochs=2) iris = datasets.load_iris() X = iris.data model.fit(X) model.save_model(model_filepath) # Check that trying to save over an existing folder raises an Exception with pytest.raises(FileExistsError) as exception_info: model.save_model(model_filepath) assert isinstance(exception_info.value, FileExistsError) # Check that can overwrite existing model if requested model.save_model(model_filepath, overwrite=True)
def test_regression(): (x_train, y_train), (x_test, y_test) = boston_housing.load_data() supervision_metric = 'mae' ivis_boston = Ivis(k=15, batch_size=16, epochs=5, supervision_metric=supervision_metric) ivis_boston.fit(x_train, y_train) embeddings = ivis_boston.transform(x_train) y_pred = ivis_boston.score_samples(x_train) assert ivis_boston.model_.loss['supervised'] == 'mae' assert ivis_boston.model_.layers[-1].activation.__name__ == 'linear' assert ivis_boston.model_.layers[-1].output_shape[-1] == 1
def test_supervised_model_saving(model_filepath): model = Ivis(k=15, batch_size=16, epochs=5, supervision_metric='sparse_categorical_crossentropy') iris = datasets.load_iris() X = iris.data Y = iris.target model.fit(X, Y) model.save_model(model_filepath, overwrite=True) model_2 = Ivis() model_2.load_model(model_filepath) # Check that model embeddings are same assert np.all(model.transform(X) == model_2.transform(X)) # Check that model supervised predictions are same assert np.all(model.score_samples(X) == model_2.score_samples(X)) # Serializable dict eles same assert model.__getstate__() == model_2.__getstate__() # Check all weights are the same for model_layer, model_2_layer in zip(model.encoder.layers, model_2.encoder.layers): model_layer_weights = model_layer.get_weights() model_2_layer_weights = model_2_layer.get_weights() for i in range(len(model_layer_weights)): assert np.all(model_layer_weights[i] == model_2_layer_weights[i]) # Check optimizer weights are the same for w1, w2 in zip(model.model_.optimizer.get_weights(), model_2.model_.optimizer.get_weights()): assert np.all(w1 == w2) # Check that trying to save over an existing folder raises an Exception with pytest.raises(FileExistsError) as exception_info: model.save_model(model_filepath) assert isinstance(exception_info.value, FileExistsError) # Check that can overwrite existing model if requested model.save_model(model_filepath, overwrite=True) # Train new model y_pred_2 = model_2.fit_transform(X, Y)
def test_custom_model_saving(model_filepath): iris = datasets.load_iris() X = iris.data Y = iris.target # Create a custom model inputs = tf.keras.layers.Input(shape=(X.shape[-1], )) x = tf.keras.layers.Dense(128, activation='relu')(inputs) custom_model = tf.keras.Model(inputs, x) model = Ivis(k=15, batch_size=16, epochs=5, supervision_metric='sparse_categorical_crossentropy', model=custom_model) model.fit(X, Y) model.save_model(model_filepath, overwrite=True) model_2 = Ivis() model_2.load_model(model_filepath) # Check that model embeddings are same assert np.all(model.transform(X) == model_2.transform(X)) # Check that model supervised predictions are same assert np.all(model.score_samples(X) == model_2.score_samples(X)) # Serializable dict eles same assert model.__getstate__() == model_2.__getstate__() # Check all weights are the same for model_layer, model_2_layer in zip(model.encoder.layers, model_2.encoder.layers): model_layer_weights = model_layer.get_weights() model_2_layer_weights = model_2_layer.get_weights() for i in range(len(model_layer_weights)): assert np.all(model_layer_weights[i] == model_2_layer_weights[i]) # Check optimizer weights are the same for w1, w2 in zip(model.model_.optimizer.get_weights(), model_2.model_.optimizer.get_weights()): assert np.all(w1 == w2) # Train new model y_pred_2 = model_2.fit_transform(X, Y)
def ivis_reduce(docvecs, label, ivis_model, use_nn, **kwargs): if use_nn: if not ivis_model: print(f"Train ivis...") ivis_model = Ivis(embedding_dims=1, k=15, model="maaten", n_epochs_without_progress=15, verbose=0, batch_size=128) if -1 in label.unique() and label.value_counts()[-1] == label.shape[0]: print("No labeled data found.") ivis_model = ivis_model.fit(docvecs) else: ivis_model = ivis_model.fit( docvecs, Y=label.to_numpy()) dim_reduced_vecs = ivis_model.transform(docvecs) decision_scores = dim_reduced_vecs.astype(float) return decision_scores, ivis_model else: return docvecs, None
def _supervised_model_save_test(model_filepath, save_fn, load_fn): model = Ivis(k=15, batch_size=16, epochs=2, supervision_metric='sparse_categorical_crossentropy') iris = datasets.load_iris() X = iris.data Y = iris.target model.fit(X, Y) save_fn(model, model_filepath) model_2 = load_fn(model_filepath) # Check that model embeddings are same assert np.all(model.transform(X) == model_2.transform(X)) # Check that model supervised predictions are same assert np.all(model.score_samples(X) == model_2.score_samples(X)) _validate_network_equality(model, model_2) # Train new model y_pred_2 = model_2.fit_transform(X, Y)
iris dataset ============ Example of reducing dimensionality of the iris dataset using ivis. """ import seaborn as sns import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.preprocessing import MinMaxScaler from ivis import Ivis sns.set(context='paper', style='white') X = load_iris().data X = MinMaxScaler().fit_transform(X) ivis = Ivis(k=5, model='maaten', verbose=0) ivis.fit(X) embeddings = ivis.transform(X) plt.figure(figsize=(5, 5), dpi=100) plt.scatter(embeddings[:, 0], embeddings[:, 1], c=load_iris().target, s=20) plt.xlabel('ivis 1') plt.ylabel('ivis 2') plt.title('ivis embeddings of the iris dataset') plt.show()
============================================= ivis is able to make use of any provided class labels to perform supervised dimensionality reduction. Supervised embeddings combine the distance-based characteristics of the unsupervised ivis algorithm with clear class boundaries between the class categories. The resulting embeddings encode relevant class-specific information into lower dimensional space, making them useful for enhancing the performance of a classifier. To train ivis in supervised mode, simply provide the labels to the fit method’s Y parameter. These labels should be a list of 0-indexed integers with each integer corresponding to a class. """ import numpy as np from keras.datasets import mnist from ivis import Ivis (X_train, Y_train), (X_test, Y_test) = mnist.load_data() # Rescale to 0-1 X_train = X_train / 255. X_test = X_test / 255. # Flatten images to 1D vectors X_train = np.reshape(X_train, (len(X_train), 28 * 28)) X_test = np.reshape(X_test, (len(X_test), 28 * 28)) model = Ivis(n_epochs_without_progress=5) model.fit(X_train, Y_train)
n_components=umap_n_components, random_state=42, verbose=True) umap_reducer = umap_reducer.fit(list(docvecs)) dim_reduced_vecs = umap_reducer.transform(list(docvecs)) if not use_ivis: decision_scores = dim_reduced_vecs.astype(float) # %% # Ivis if use_ivis: ivis_reducer = Ivis(embedding_dims=1, k=15, model="maaten", n_epochs_without_progress=15) ivis_reducer = ivis_reducer.fit(dim_reduced_vecs) dim_reduced_vecs = ivis_reducer.transform(dim_reduced_vecs) decision_scores = dim_reduced_vecs.astype(float) # %% iqrout = IQROutlier(contamination=0.1) iqrout = iqrout.fit(decision_scores) preds = iqrout.transform(decision_scores) scores = get_scores(dict(), df["outlier_label"], preds) scores # %% # validate df_val = pd.read_csv("/home/philipp/projects/dad4td/data/raw/amazon.csv",