from tda.tda_logging import get_logger

import torch
from tda.devices import device
from tda.models import Architecture

logger = get_logger("AdvGen")


# One-hot vector based on scalar
def one_hot(y, num_classes=None):
    if num_classes is None:
        classes, _ = y.max(0)
        num_classes = (classes.max() + 1).item()
    if y.dim() > 0:
        y_ = torch.zeros(len(y), num_classes, device=y.device)
    else:
        y_ = torch.zeros(1, num_classes)
    y_.scatter_(1, y.unsqueeze(-1), 1)
    y_ = y_.to(device)
    return y_


def ce_loss(outputs, labels, num_classes=None):
    """
    Cross_entropy loss
    (output = post-softmax output of the model,
     and label =  one-hot)
    """
    labels = one_hot(labels, num_classes=num_classes)
    size = len(outputs)
import typing

import numpy as np
import pickle

from tda.graph import Graph
from tda.tda_logging import get_logger
import typing

logger = get_logger("PersistentDiagrams")
max_float = np.finfo(np.float).max

try:
    from dionysus import Filtration, Simplex, homology_persistence, init_diagrams
except Exception as e:
    logger.warn(e)
    Filtration = None

try:
    from persim import sliced_wasserstein as persim_sw
    from ripser import Rips
except Exception as e:
    persim_sw = None


def _prepare_edges_for_diagram(edge_list: typing.List):
    """
    Enrich the edge list with the vertex and find their birth date
    """

    timing_by_vertex = dict()
from scipy.sparse import coo_matrix
from torch import Tensor
import typing
import numpy as np
from tda.models.architectures import Architecture
from tda.tda_logging import get_logger

logger = get_logger("GraphComputation")


class Graph(object):
    def __init__(self, edge_dict: typing.Dict):
        self._edge_dict = edge_dict

    @classmethod
    def from_architecture_and_data_point(cls, architecture: Architecture,
                                         x: Tensor):
        raw_edge_dict = architecture.get_graph_values(x)
        #logger.info(f"raw_edge_dict = {raw_edge_dict}")
        edge_dict = dict()
        for layer_link in raw_edge_dict:
            v = raw_edge_dict[layer_link]
            v = np.abs(v) * 10e5
            edge_dict[layer_link] = v

        return cls(edge_dict=edge_dict)

    def thresholdize(self, thresholds, low_pass: bool):
        for layer_link in self._edge_dict:
            v = self._edge_dict[layer_link]
            # logger.info(f"layer link {layer_link} and shape of v = {v.todense().shape}")
예제 #4
0
from typing import Optional

from scipy.sparse import coo_matrix, csr_matrix, diags
from torch import nn
import numpy as np

from tda.tda_logging import get_logger

logger = get_logger("Layer")
import torch
from tda.precision import default_tensor_type

torch.set_default_tensor_type(default_tensor_type)


class Layer(object):
    def __init__(self, func: nn.Module, graph_layer: bool, name: Optional[str] = None):
        self.func = func.type(default_tensor_type)
        self.graph_layer = graph_layer
        self._activations = None
        self.matrix = None
        self.name = name

    def build_matrix(self) -> coo_matrix:
        raise NotImplementedError()

    def get_matrix(self):
        ret = dict()
        for parentidx in self._activations:
            activ = self._activations[parentidx].reshape(-1)
            ret[parentidx] = coo_matrix(
예제 #5
0
import pytest
from functools import reduce

from tda.models import get_deep_model, cifar_lenet
from tda.models.architectures import (
    mnist_mlp,
    Architecture,
    mnist_lenet,
    svhn_lenet,
    cifar_toy_resnet,
    cifar_resnet_1,
)
from tda.dataset.datasets import Dataset
from tda.tda_logging import get_logger

logger = get_logger("test_models")


def test_get_mnist_model():
    torch.manual_seed(37)
    random.seed(38)
    np.random.seed(39)

    source_dataset = Dataset("MNIST")
    _, val_acc, test_acc = get_deep_model(
        dataset=source_dataset,
        num_epochs=1,
        architecture=mnist_lenet,
        with_details=True,
        force_retrain=True,
    )
예제 #6
0
from abc import ABC
import numpy as np
from tda.tda_logging import get_logger

ClassIndex = int

logger = get_logger("CovarianceComputer")


class CovarianceStreamComputer(ABC):
    """
    Helper object to compute covariance matrices and
    mean of a stream of 1d vectors.
    """

    def append(self, x: np.ndarray, clazz: ClassIndex):
        raise NotImplementedError()

    def mean_per_class(self, y: ClassIndex) -> np.ndarray:
        raise NotImplementedError()

    def precision_root(self) -> np.ndarray:
        raise NotImplementedError()

    def precision(self) -> np.ndarray:
        root = self.precision_root()
        return np.transpose(root) @ root
예제 #7
0
import fwg
import numpy as np
from joblib import Parallel, delayed

from tda.embeddings.persistent_diagrams import (
    sliced_wasserstein_kernel,
    compute_dgm_from_graph,
)
from tda.embeddings.raw_graph import to_sparse_vector
from tda.graph import Graph
from tda.dataset.graph_dataset import DatasetLine
from tda.models import Architecture
from tda.tda_logging import get_logger

logger = get_logger("Embeddings")


class Embedding(NamedTuple):
    value: object
    time_taken: Dict


class EmbeddingType(object):
    PersistentDiagram = "PersistentDiagram"
    RawGraph = "RawGraph"


class KernelType(object):
    Euclidean = "Euclidean"
    RBF = "RBF"
예제 #8
0
from tda.embeddings import KernelType
from tda.dataset.graph_dataset import DatasetLine
from tda.tda_logging import get_logger
from tda.models import mnist_mlp, Dataset, get_deep_model
from tda.models.architectures import get_architecture, Architecture
from tda.protocol import get_protocolar_datasets, evaluate_embeddings

from tda.covariance import (
    CovarianceStreamComputer,
    NaiveCovarianceStreamComputer,
    LedoitWolfComputer,
    NaiveSVDCovarianceStreamComputer,
    GraphicalLassoComputer,
)

logger = get_logger("Mahalanobis")

start_time = time.time()

plot_path = f"{os.path.dirname(os.path.realpath(__file__))}/plots"
if not os.path.exists(plot_path):
    os.mkdir(plot_path)

# Custom types for better readability
LayerIndex = int
ClassIndex = int


class Config(NamedTuple):
    # Number of epochs for the model
    epochs: int
import mlflow
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM, SVC
from sklearn.utils import check_random_state

from tda.dataset.adversarial_generation import AttackBackend
from tda.embeddings import get_gram_matrix
from tda.dataset.graph_dataset import get_sample_dataset
from tda.models import Architecture, Dataset
from tda.tda_logging import get_logger

logger = get_logger("C3PO")


def get_protocolar_datasets(
    noise: float,
    dataset: Dataset,
    succ_adv: bool,
    archi: Architecture,
    dataset_size: int,
    attack_type: str,
    all_epsilons: typing.List,
    attack_backend: str = AttackBackend.FOOLBOX,
    compute_graph: bool = False,
    transfered_attacks: bool = False,
):
    logger.info("I will produce for you the protocolar datasets !")
from .layer import Layer
from torch import nn
from functools import reduce
from numba import njit
import numpy as np
from scipy.sparse import coo_matrix, bmat as sparse_bmat
from tda.tda_logging import get_logger
import torch
from tda.precision import default_tensor_type

torch.set_default_tensor_type(default_tensor_type)

logger = get_logger("ConvLayer")


class ConvLayer(Layer):
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        input_shape=None,
        stride=1,
        padding=0,
        bias=False,
        activ=None,
        name=None,
        grouped_channels: bool = False,
        p=0.0,
    ):
예제 #11
0
import os

import numpy as np
import torch

from tda.cache import cached
from tda.dataset.adversarial_generation import AttackBackend, adversarial_generation
from tda.devices import device
from tda.graph import Graph
from tda.tda_logging import get_logger
from tda.models.architectures import Architecture, mnist_mlp
from tda.dataset.datasets import Dataset
from tda.rootpath import rootpath
from tda.models import get_deep_model

logger = get_logger("GraphDataset")


def saved_adv_path():
    directory = f"{rootpath}/saved_adversaries/"
    pathlib.Path(directory).mkdir(exist_ok=True, parents=True)
    return str(directory)


def process_sample(
    sample: typing.Tuple,
    adversarial: bool,
    noise: float = 0,
    epsilon: float = 0,
    model: typing.Optional[Architecture] = None,
    attack_type: str = "FGSM",
예제 #12
0
import torch
import os

from tda.tda_logging import get_logger

logger = get_logger("Devices")

nb_cuda_devices = torch.cuda.device_count()

logger.info(f"Found {nb_cuda_devices} devices compatible with CUDA")

if nb_cuda_devices > 0:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

if os.environ.get("FORCE_CPU", "0") == "1":
    device = torch.device("cpu")

logger.info(f"Device is {device}")
예제 #13
0
import inspect
import os
import torch
import pathlib
import socket

from tda.rootpath import rootpath
from tda.tda_logging import get_logger

logger = get_logger("Cache")

if os.path.exists("/var/opt/data/user_data"):
    # We are on gpu
    cache_root = f"/var/opt/data/user_data/tda/"
elif "mesos" in socket.gethostname():
    # We are in mozart
    cache_root = f"{os.environ['HOME']}/tda_cache/"
else:
    # Other cases (local)
    cache_root = f"{rootpath}/cache/"

logger.info(f"Cache root {cache_root}")


def cached(my_func):
    arg_spec = inspect.getfullargspec(my_func).args

    def my_func_with_cache(*args, **kw):
        kw.update({arg_spec[i]: arg for i, arg in enumerate(args)})
        base_path = f"{cache_root}/{my_func.__name__}/"
        pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
예제 #14
0
_trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.to(device)),
    transforms.Normalize((0.0, ), (1.0, )),
])
_trans_BandW = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.to(device)),
    transforms.Normalize((0.0, ), (1.0, )),
])

torch.manual_seed(1)
seed(1)

logger = get_logger("Datasets")


class dsetsCircleToy(torch.utils.data.Dataset):
    def __init__(self, n_samples=5000, noise=0.05, factor=0.5):
        X_, Y_ = datasets.make_circles(n_samples=n_samples,
                                       shuffle=True,
                                       noise=noise,
                                       factor=factor)
        X_ = [(x_ + 1.3) / 2.6 for x_ in X_]
        Y__ = np.reshape(Y_, len(Y_))
        self.X = torch.tensor(X_, dtype=torch.float)
        self.Y = torch.tensor(Y__, dtype=torch.long)
        self.n_samples = n_samples

    def __len__(self):
import torch
from sklearn.metrics.pairwise import euclidean_distances
import mlflow

from tda.dataset.adversarial_generation import AttackType
from tda.dataset.graph_dataset import DatasetLine
from tda.embeddings import KernelType
from tda.models import Dataset, get_deep_model, mnist_lenet
from tda.models.layers import SoftMaxLayer
from tda.models.architectures import get_architecture, Architecture
from tda.protocol import get_protocolar_datasets, evaluate_embeddings
from tda.tda_logging import get_logger

mlflow.set_experiment("tda_adv_detection")

logger = get_logger("LID")

start_time = time.time()

plot_path = f"{os.path.dirname(os.path.realpath(__file__))}/plots"
if not os.path.exists(plot_path):
    os.mkdir(plot_path)


class Config(NamedTuple):
    # Noise to consider for the noisy samples
    noise: float
    # Number of epochs for the model
    epochs: int
    # Dataset we consider (MNIST, SVHN)
    dataset: str
예제 #16
0
from tda.embeddings import (
    get_embedding,
    EmbeddingType,
    KernelType,
    ThresholdStrategy,
    Embedding,
)
from tda.embeddings.raw_graph import identify_active_indices, featurize_vectors
from tda.graph_stats import get_quantiles_helpers
from tda.models import get_deep_model, Dataset
from tda.models.architectures import mnist_mlp, get_architecture
from tda.protocol import get_protocolar_datasets, evaluate_embeddings
from tda.tda_logging import get_logger
from tda.threshold_underoptimized_edges import process_thresholds_underopt

logger = get_logger("Detector")
start_time = time.time()

mlflow.set_experiment("tda_adv_detection")


class Config(NamedTuple):
    # Type of embedding to use
    embedding_type: str
    # Type of kernel to use on the embeddings
    kernel_type: str
    # High threshold for the edges of the activation graph
    thresholds: str
    # Which thresholding strategy should we use
    threshold_strategy: str
    # Noise to consider for the noisy samples
예제 #17
0
import os
import time
from typing import Dict, Tuple
from functools import reduce

import numpy as np
import torch
import mlflow
from numpy.random import Generator, PCG64

from tda.embeddings import ThresholdStrategy
from tda.models import Architecture
from tda.models.layers import ConvLayer
from tda.tda_logging import get_logger

logger = get_logger("Thresholds Underoptimized")


def _process_raw_quantiles(raw_quantiles: str) -> Dict[int, Tuple]:

    ret = dict()
    for raw_quantile in raw_quantiles.split("_"):
        layer_idx, value_low, value_up = raw_quantile.split(":")
        ret[int(layer_idx)] = (float(value_low), float(value_up))
        mlflow.log_metric(f"edges_quant_low_{int(layer_idx)}",
                          float(value_low))
        mlflow.log_metric(f"edges_quant_up_{int(layer_idx)}", float(value_up))
    return ret


def underopt_edges(quantiles: Dict, method: str, model: Architecture,
예제 #18
0
    svhn_resnet_1,
    toy_mlp,
    toy_mlp2,
    toy_mlp3,
    toy_mlp4,
    efficientnet,
)
from tda.dataset.datasets import Dataset
from tda.models.layers import ConvLayer, LinearLayer
from tda.rootpath import rootpath
from tda.tda_logging import get_logger
from tda.precision import default_tensor_type

torch.set_default_tensor_type(default_tensor_type)

logger = get_logger("Models")

mlflow.set_experiment("tda_adv_detection")

pathlib.Path("/tmp/tda/trained_models").mkdir(parents=True, exist_ok=True)


class GradualWarmupScheduler(_LRScheduler):
    """ Gradually warm-up(increasing) learning rate in optimizer.
    Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'.
    Args:
        optimizer (Optimizer): Wrapped optimizer.
        multiplier: target learning rate = base lr * multiplier
        total_epoch: target learning rate is reached at total_epoch, gradually
        after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau)
    """
import mlflow
import torch

import matplotlib.pyplot as plt
import numpy as np

from tda.dataset.graph_dataset import get_sample_dataset, AttackBackend
from tda.models import Dataset, get_deep_model
from tda.models.architectures import Architecture
from tda.models.architectures import get_architecture, svhn_lenet
from tda.tda_logging import get_logger
from tda.rootpath import rootpath

start_time = time.time()

logger = get_logger("GraphStats")

mlflow.set_experiment("tda_adv_detection")


class Config(typing.NamedTuple):
    # Noise to consider for the noisy samples
    noise: float
    # Number of epochs for the model
    epochs: int
    # Dataset we consider (MNIST, SVHN)
    dataset: str
    # Name of the architecture
    architecture: str
    # Noise to be added during the training of the model
    train_noise: float
# from art.classifiers import PyTorchClassifier
import foolbox as fb
from cached_property import cached_property

from tda.devices import device
from tda.models.layers import (
    Layer,
    SoftMaxLayer,
)
from tda.rootpath import model_dir
from tda.tda_logging import get_logger
from tda.precision import default_tensor_type

torch.set_default_tensor_type(default_tensor_type)
logger = get_logger("Architecture")

#################
# Architectures #
#################
logger = get_logger("Architecture")


class Architecture(nn.Module):
    def __init__(
        self,
        layers: List[Layer],
        preprocess: Callable = None,
        layer_links: List[Tuple[int, int]] = None,
        name: str = "",
    ):