Exemple #1
0
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sacred import Ingredient

cleaner_ingredient = Ingredient("data_cleaning")
cleaner_ingredient.add_config("config.yaml")


class ApplicationCleaning(BaseEstimator, TransformerMixin):
    """
    Очистка данных из источника application_train / application_test.

    Parameters
    ----------
    fill_missing: bool, optional, default = False
        Флаг заполнения пропусков. Опциональный параметр, по умолчанию, не используется.

    fill_value: float, optional, default = 0
        Значение для заполнения пропусков.

    copy: bool, optional, default = True
        Если True, то для преобразования используется копия данных, иначе исходный набор
        данных. Опциональный параметр, по умолчанию, используется копия данных.

    """
    def __init__(self,
                 fill_missing: bool = False,
                 fill_value: float = 0,
                 copy: bool = True) -> None:
        self.fill_missing = fill_missing
from run_sacred import data_ingredient, method_ingredient, optim_ingredient, get_feature_of
from sacred import Experiment, Ingredient
from sacred_wrap import MongoExtractor

from utils import get_split_samplers, SplitBatchSampler
from utils import get_split_datasets
from utils import flatten_dict
from divergence import CMD, pairwise_divergence


classifier_ingredient = Ingredient('classifier')
classifier_ingredient.add_config({
    'pretrain': False,
    'finetune_g': False,
    'use_c_enc': False,
    'finetune_c': False,
    'hiddens': None,
    'auxiliary': 0.0,
    'label_size': 1.0,
})

classifier_optim_ingredient = Ingredient('classifier_optim')
classifier_optim_ingredient.add_config({
    'lr': 0.001,
    'num_batch': 30000,
    'batch_size': 128,
    'monitor_per': 100,
})


def get_classifier(model, num_classes, finetune_g, use_c_enc, finetune_c, hiddens, **kwargs):
import os

import torch
import torch.nn as nn
import torch.optim as optim
from sacred import Ingredient

from ingredients.quantize import batch_dither, quantize

train_ingredient = Ingredient('train')
train_ingredient.add_config('config.json')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


@train_ingredient.capture
def train(trained_models_dir,
          epochs,
          lr_steps,
          model,
          loader,
          _run,
          momentum,
          weight_decay,
          opt,
          model_name,
          bitdepth=8,
          dither=False):

    scheduler = optim.lr_scheduler.StepLR(opt, step_size=lr_steps, gamma=0.1)
    n_batches = len(loader)
    for epoch in range(epochs):
Exemple #4
0
from typing import Optional
import pandas as pd
from sacred import Ingredient

dataset_ingredient = Ingredient("dataset")
dataset_ingredient.add_config("config.yaml")


@dataset_ingredient.capture
def get_input(train_data_path: str,
              test_data_path: Optional[str] = None,
              target_name: Optional[str] = None):
    """
    Функция загрузки данных.

    Parameters
    ----------
    train_data_path: str
        Путь до обучающей выборки.

    test_data_path: str, optional, default = None
        Путь до тестовой выборки. Опциональный параметр. По умолчанию не
        используется, т.е. тестовая выборка не загружается.

    target_name: str, optional, default = None
        Название целевой переменной.

    Returns
    -------
    train, target: Tuple[pd.DataFrame, pd.Series]
        Кортеж, где первый элемент - матрица признаков, второй - вектор целевой переменной.
Exemple #5
0
import os

import torch
import torch.nn as nn
from sacred import Ingredient
from tqdm import tqdm

from ingredients.quantize import batch_dither, quantize

test_ingredient = Ingredient('test')
test_ingredient.add_config('config.json')
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


@test_ingredient.capture
def test(model, loader, bitdepth=8, dither=False):
    total_loss, total_err = 0.0, 0.0
    iterator = tqdm(loader)
    with torch.no_grad():
        for X, y in iterator:
            X = X.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            if bitdepth != 8 and dither is False:
                X = quantize(X, bitdepth)
            elif bitdepth != 8 and dither is True:
                X = batch_dither(X, bitdepth)

            X = X.to(device, non_blocking=True)
            yp = model(X)
            loss = nn.CrossEntropyLoss()(yp, y)
"""
Attack code is extracted from Madry Lab and NIPS 2018 tutorial
https://github.com/MadryLab/robustness_lib
https://adversarial-ml-tutorial.org/
"""
import torch
import torch.nn as nn
from sacred import Ingredient

attacks_ingredient = Ingredient('attacks')
attacks_ingredient.add_config('config.json')


@attacks_ingredient.capture
def pgd_linf2(model, X, y, epsilon, alpha):
    epsilon = epsilon / 255
    alpha = 2 / 255
    delta = torch.zeros_like(X, requires_grad=True)

    X_prime = X + delta
    for t in range(20):
        loss = nn.CrossEntropyLoss()(model(X_prime), y)
        loss.backward()
        delta.data = (delta + alpha * delta.grad.detach().sign()).clamp(
            -epsilon, epsilon
        )
        delta.grad.zero_()
        X_prime = torch.clamp(X+delta, 0, 1)
    return X_prime.detach()

Exemple #7
0
    if config['method']['name'] == 'CPC':
        REGISTERED_PARAM = {
            'sampler_mode': ['random', 'diff', 'same'],
        }
        for key, valid_list in iteritems(REGISTERED_PARAM):
            assert config['method'][key] in valid_list, "Invalid {} {}".format(
                key, config['method'][key])
    return config


data_ingredient = Ingredient('dataset')
data_ingredient.add_config({
    "name": 'oppG',
    'validation': 'ADL4-ADL5',
    'test_domain': 'S1',
    'L': 12,
    'K': 5,
})
data_ingredient.config_hook(verify_dataset)

method_ingredient = Ingredient('method')
method_ingredient.add_config({
    'name': 'CPC',
    'hidden': 1600,
    'context': 800,
    'num_gru': 1,
    'sampler_mode': 'random',
    'num_negative': 1,
    'cont_type': 'sigmoid',
    'mask_size': 1.0,
from sacred import Ingredient
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

data_ingredient = Ingredient('dataset')
data_ingredient.add_config('config.json')


@data_ingredient.capture
def load_cifar10(data_dir, batch_size, num_workers):
    transform_train = transforms.Compose(
        [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ]
    )
    transform_test = transforms.Compose([transforms.ToTensor()])

    train_set = datasets.CIFAR10(
        root=data_dir, train=True, download=True,
        transform=transform_train)
    test_set = datasets.CIFAR10(
        root=data_dir, train=False, download=True,
        transform=transform_test)

    train_loader = DataLoader(train_set, batch_size,
                              shuffle=True, pin_memory=True,
                              num_workers=num_workers)
    test_loader = DataLoader(test_set, batch_size,
                             shuffle=False, pin_memory=True,
Exemple #9
0
# from dataset.tdisc import load_tdata
from dataset.sprites import load_sprites
from dataset.shapes3d import load_shapes3d
from dataset.mpi import load_mpi3d
from dataset.transforms import Triplets

import configs.datasplits as splits

dataset = Ingredient('dataset')
load_sprites = dataset.capture(load_sprites)
load_shapes3d = dataset.capture(load_shapes3d)
load_mpi3d = dataset.capture(load_mpi3d)
load_composition = dataset.capture(Triplets)

dataset.add_config(setting='unsupervised')
dataset.add_named_config('unsupervised', setting='unsupervised')
dataset.add_named_config('supervised', setting='supervised')


@dataset.capture
def get_dataset(dataset):
    if dataset == 'dsprites':
        dataset_loader = load_sprites
    elif dataset == 'shapes3d':
        dataset_loader = load_shapes3d
    elif dataset == 'mpi3d':
        dataset_loader = load_mpi3d
    elif dataset == 'composition':
        dataset_loader = load_composition
    else: