import numpy as np import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin from sacred import Ingredient cleaner_ingredient = Ingredient("data_cleaning") cleaner_ingredient.add_config("config.yaml") class ApplicationCleaning(BaseEstimator, TransformerMixin): """ Очистка данных из источника application_train / application_test. Parameters ---------- fill_missing: bool, optional, default = False Флаг заполнения пропусков. Опциональный параметр, по умолчанию, не используется. fill_value: float, optional, default = 0 Значение для заполнения пропусков. copy: bool, optional, default = True Если True, то для преобразования используется копия данных, иначе исходный набор данных. Опциональный параметр, по умолчанию, используется копия данных. """ def __init__(self, fill_missing: bool = False, fill_value: float = 0, copy: bool = True) -> None: self.fill_missing = fill_missing
from run_sacred import data_ingredient, method_ingredient, optim_ingredient, get_feature_of from sacred import Experiment, Ingredient from sacred_wrap import MongoExtractor from utils import get_split_samplers, SplitBatchSampler from utils import get_split_datasets from utils import flatten_dict from divergence import CMD, pairwise_divergence classifier_ingredient = Ingredient('classifier') classifier_ingredient.add_config({ 'pretrain': False, 'finetune_g': False, 'use_c_enc': False, 'finetune_c': False, 'hiddens': None, 'auxiliary': 0.0, 'label_size': 1.0, }) classifier_optim_ingredient = Ingredient('classifier_optim') classifier_optim_ingredient.add_config({ 'lr': 0.001, 'num_batch': 30000, 'batch_size': 128, 'monitor_per': 100, }) def get_classifier(model, num_classes, finetune_g, use_c_enc, finetune_c, hiddens, **kwargs):
import os import torch import torch.nn as nn import torch.optim as optim from sacred import Ingredient from ingredients.quantize import batch_dither, quantize train_ingredient = Ingredient('train') train_ingredient.add_config('config.json') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') @train_ingredient.capture def train(trained_models_dir, epochs, lr_steps, model, loader, _run, momentum, weight_decay, opt, model_name, bitdepth=8, dither=False): scheduler = optim.lr_scheduler.StepLR(opt, step_size=lr_steps, gamma=0.1) n_batches = len(loader) for epoch in range(epochs):
from typing import Optional import pandas as pd from sacred import Ingredient dataset_ingredient = Ingredient("dataset") dataset_ingredient.add_config("config.yaml") @dataset_ingredient.capture def get_input(train_data_path: str, test_data_path: Optional[str] = None, target_name: Optional[str] = None): """ Функция загрузки данных. Parameters ---------- train_data_path: str Путь до обучающей выборки. test_data_path: str, optional, default = None Путь до тестовой выборки. Опциональный параметр. По умолчанию не используется, т.е. тестовая выборка не загружается. target_name: str, optional, default = None Название целевой переменной. Returns ------- train, target: Tuple[pd.DataFrame, pd.Series] Кортеж, где первый элемент - матрица признаков, второй - вектор целевой переменной.
import os import torch import torch.nn as nn from sacred import Ingredient from tqdm import tqdm from ingredients.quantize import batch_dither, quantize test_ingredient = Ingredient('test') test_ingredient.add_config('config.json') device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') @test_ingredient.capture def test(model, loader, bitdepth=8, dither=False): total_loss, total_err = 0.0, 0.0 iterator = tqdm(loader) with torch.no_grad(): for X, y in iterator: X = X.to(device, non_blocking=True) y = y.to(device, non_blocking=True) if bitdepth != 8 and dither is False: X = quantize(X, bitdepth) elif bitdepth != 8 and dither is True: X = batch_dither(X, bitdepth) X = X.to(device, non_blocking=True) yp = model(X) loss = nn.CrossEntropyLoss()(yp, y)
""" Attack code is extracted from Madry Lab and NIPS 2018 tutorial https://github.com/MadryLab/robustness_lib https://adversarial-ml-tutorial.org/ """ import torch import torch.nn as nn from sacred import Ingredient attacks_ingredient = Ingredient('attacks') attacks_ingredient.add_config('config.json') @attacks_ingredient.capture def pgd_linf2(model, X, y, epsilon, alpha): epsilon = epsilon / 255 alpha = 2 / 255 delta = torch.zeros_like(X, requires_grad=True) X_prime = X + delta for t in range(20): loss = nn.CrossEntropyLoss()(model(X_prime), y) loss.backward() delta.data = (delta + alpha * delta.grad.detach().sign()).clamp( -epsilon, epsilon ) delta.grad.zero_() X_prime = torch.clamp(X+delta, 0, 1) return X_prime.detach()
if config['method']['name'] == 'CPC': REGISTERED_PARAM = { 'sampler_mode': ['random', 'diff', 'same'], } for key, valid_list in iteritems(REGISTERED_PARAM): assert config['method'][key] in valid_list, "Invalid {} {}".format( key, config['method'][key]) return config data_ingredient = Ingredient('dataset') data_ingredient.add_config({ "name": 'oppG', 'validation': 'ADL4-ADL5', 'test_domain': 'S1', 'L': 12, 'K': 5, }) data_ingredient.config_hook(verify_dataset) method_ingredient = Ingredient('method') method_ingredient.add_config({ 'name': 'CPC', 'hidden': 1600, 'context': 800, 'num_gru': 1, 'sampler_mode': 'random', 'num_negative': 1, 'cont_type': 'sigmoid', 'mask_size': 1.0,
from sacred import Ingredient from torch.utils.data import DataLoader from torchvision import datasets, transforms data_ingredient = Ingredient('dataset') data_ingredient.add_config('config.json') @data_ingredient.capture def load_cifar10(data_dir, batch_size, num_workers): transform_train = transforms.Compose( [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ] ) transform_test = transforms.Compose([transforms.ToTensor()]) train_set = datasets.CIFAR10( root=data_dir, train=True, download=True, transform=transform_train) test_set = datasets.CIFAR10( root=data_dir, train=False, download=True, transform=transform_test) train_loader = DataLoader(train_set, batch_size, shuffle=True, pin_memory=True, num_workers=num_workers) test_loader = DataLoader(test_set, batch_size, shuffle=False, pin_memory=True,
# from dataset.tdisc import load_tdata from dataset.sprites import load_sprites from dataset.shapes3d import load_shapes3d from dataset.mpi import load_mpi3d from dataset.transforms import Triplets import configs.datasplits as splits dataset = Ingredient('dataset') load_sprites = dataset.capture(load_sprites) load_shapes3d = dataset.capture(load_shapes3d) load_mpi3d = dataset.capture(load_mpi3d) load_composition = dataset.capture(Triplets) dataset.add_config(setting='unsupervised') dataset.add_named_config('unsupervised', setting='unsupervised') dataset.add_named_config('supervised', setting='supervised') @dataset.capture def get_dataset(dataset): if dataset == 'dsprites': dataset_loader = load_sprites elif dataset == 'shapes3d': dataset_loader = load_shapes3d elif dataset == 'mpi3d': dataset_loader = load_mpi3d elif dataset == 'composition': dataset_loader = load_composition else: