Example #1
0
def test_circular_dependency_raises():
    # create experiment with circular dependency
    ing = Ingredient('ing')
    ex = Experiment('exp', ingredients=[ing])
    ex.main(lambda: None)
    ing.ingredients.append(ex)

    # run and see if it raises
    with pytest.raises(CircularDependencyError, match='exp->ing->exp'):
        ex.run()
Example #2
0
def test_format_named_configs():
    ingred = Ingredient('ingred')
    ex = Experiment(name='experiment', ingredients=[ingred])

    @ingred.named_config
    def named_config1():
        pass

    @ex.named_config
    def named_config2():
        """named config with doc"""
        pass

    dict_config = dict(v=42)
    ingred.add_named_config('dict_config', dict_config)

    named_configs_text = _format_named_configs(
        OrderedDict(ex.gather_named_configs()))
    assert named_configs_text.startswith('Named Configurations (' + COLOR_DOC +
                                         'doc' + ENDC + '):')
    assert 'named_config2' in named_configs_text
    assert '# named config with doc' in named_configs_text
    assert 'ingred.named_config1' in named_configs_text
    assert 'ingred.dict_config' in named_configs_text
Example #3
0
def test_format_named_configs():
    ingred = Ingredient("ingred")
    ex = Experiment(name="experiment", ingredients=[ingred])

    @ingred.named_config
    def named_config1():
        pass

    @ex.named_config
    def named_config2():
        """named config with doc"""
        pass

    dict_config = dict(v=42)
    ingred.add_named_config("dict_config", dict_config)

    named_configs_text = _format_named_configs(
        OrderedDict(ex.gather_named_configs()))
    assert named_configs_text.startswith("Named Configurations (" + COLOR_DOC +
                                         "doc" + ENDC + "):")
    assert "named_config2" in named_configs_text
    assert "# named config with doc" in named_configs_text
    assert "ingred.named_config1" in named_configs_text
    assert "ingred.dict_config" in named_configs_text
Example #4
0
def test_named_config_and_ingredient():
    ing = Ingredient("foo")

    @ing.config
    def cfg():
        a = 10

    ex = Experiment(ingredients=[ing])

    @ex.config
    def default():
        b = 20

    @ex.named_config
    def named():
        b = 30

    @ex.main
    def main():
        pass

    r = ex.run(named_configs=["named"])
    assert r.config["b"] == 30
    assert r.config["foo"] == {"a": 10}
Example #5
0
def test_named_config_and_ingredient():
    ing = Ingredient('foo')

    @ing.config
    def cfg():
        a = 10

    ex = Experiment(ingredients=[ing])

    @ex.config
    def default():
        b = 20

    @ex.named_config
    def named():
        b = 30

    @ex.main
    def main():
        pass

    r = ex.run(named_configs=['named'])
    assert r.config['b'] == 30
    assert r.config['foo'] == {'a': 10}
Example #6
0
import numpy as np
from sacred import Ingredient

config_ingredient = Ingredient("cfg")


@config_ingredient.config
def cfg():
    # Base configuration
    model_config = {
        "musdb_path":
        "C:/Users/Joaquin/Documents/GitHub/Wave-U-Net/Datasets/MUSDB18/",  # SET MUSDB PATH HERE, AND SET CCMIXTER PATH IN CCMixter.xml
        "estimates_path":
        "C:/Users/Joaquin/Documents/GitHub/Wave-U-Net/Source_Estimates",  # SET THIS PATH TO WHERE YOU WANT SOURCE ESTIMATES PRODUCED BY THE TRAINED MODEL TO BE SAVED. Folder itself must exist!
        "data_path":
        "data",  # Set this to where the preprocessed dataset should be saved
        "model_base_dir": "checkpoints",  # Base folder for model checkpoints
        "log_dir": "logs",  # Base folder for logs files
        "batch_size": 16,  # Batch size
        "init_sup_sep_lr":
        1e-4,  # Supervised separator learning rate (originally 1e-4)
        "epoch_it":
        10,  # Number of supervised separator steps per epoch (originally 2000)
        'cache_size':
        4000,  # Number of audio snippets buffered in the random shuffle queue. Larger is better, since workers put multiple examples of one song into this queue. The number of different songs that is sampled from with each batch equals cache_size / num_snippets_per_track. Set as high as your RAM allows.
        'num_workers':
        4,  # Number of processes used for each TF map operation used when loading the dataset
        "num_snippets_per_track":
        100,  # Number of snippets that should be extracted from each song at a time after loading it. Higher values make data loading faster, but can reduce the batches song diversity (originally 100)
        'num_layers': 12,  # How many U-Net layers
        'filter_size':
Example #7
0
"""Module containing sacred functions for handling ML models."""
import inspect

from sacred import Ingredient

from src import models

ingredient = Ingredient('model')


@ingredient.config
def cfg():
    """Model configuration."""
    name = ''
    parameters = {
    }


@ingredient.named_config
def TopologicalSurrogateAutoencoder():
    """TopologicalSurrogateAutoencoder."""
    name = 'TopologicalSurrogateAutoencoder'
    parameters = {
        'd_latent': 8*2*2,
        'batch_size': 32,
        'arch': [256, 256, 256, 256]
    }


@ingredient.named_config
def Vanilla():
Example #8
0
from sacred import Ingredient

experiment_config = Ingredient('config')


@experiment_config.config
def cfg():
    data_fetch_config = {
        "data_dir": "./images/dset1",
        "batch_size": 128,
        "train_valid_split_ratio": 0.9
    }

    AlexNet_config = {
        "num_class": 65,
        "stop_gradient_layer": "fc7",
        "dropout_prob": 0.5,
        "stddev": 1e-4,
        "weight_decay": 0.0,
        "NUM_EPOCHES_PER_DECAY": 10,
        "INITIAL_LEARNING_RATE": 0.005,
        "LEARNING_RATE_DECAY_FACTOR": 0.9,
        "MAX_EPOCHES": 1000,
        "VALID_STEPS": 100,
        "batch_size": data_fetch_config['batch_size']
    }
Example #9
0
from sacred import Ingredient, Experiment

# ================== Dataset Ingredient =======================================
# could be in a separate file

data_ingredient = Ingredient('dataset')


@data_ingredient.config
def cfg1():
    filename = 'my_dataset.npy'  # dataset filename
    normalize = True  # normalize dataset


@data_ingredient.capture
def load_data(filename, normalize):
    print("loading dataset from '{}'".format(filename))
    if normalize:
        print("normalizing dataset")
        return 1
    return 42


@data_ingredient.command
def stats(filename, foo=12):
    print('Statistics for dataset "{}":'.format(filename))
    print('mean = 42.23')
    print('foo=', foo)


# ================== Experiment ===============================================
Example #10
0
import os
import numpy as np
import pandas as pd
import gc
from common.utils import pprint, robust_zscore, download_http_resource
from loaders.dataset_v3 import Dataset
from tqdm import tqdm
import copy
from sacred import Ingredient
import pickle

DATA_SERVER = os.environ.get('DATA_SERVER', 'http://10.150.144.154:10020')

data_ingredient = Ingredient('daily_loader_v3')

EPS = 1e-12


@data_ingredient.config
def data_config():
    dset = 'day_csi300'  # dataset
    label_id = 0  # LABEL$i
    log_transform = False

    train_start_date = '2007-01-01'
    train_end_date = '2014-12-31'
    valid_start_date = '2015-01-01'
    valid_end_date = '2016-12-31'
    test_start_date = '2017-01-01'
    test_end_date = '2019-06-18'
from sacred import Ingredient
from schnetpack.datasets import ANI1, ISO17, QM9, MD17, MaterialsProject
from schnetpack.data import AtomsData, AtomsDataError
from schnetpack.atomistic import Properties

dataset_ingredient = Ingredient("dataset")


@dataset_ingredient.config
def cfg():
    """
    Base configuration for the dataset.

    """
    dbpath = None
    dataset = 'CUSTOM'
    property_mapping = {}


@dataset_ingredient.named_config
def qm9():
    """
    Default configuration for the QM9 dataset.

    """
    dbpath = './data/qm9.db'
    dataset = 'QM9'
    property_mapping = {
        Properties.energy: QM9.U0,
        Properties.dipole_moment: QM9.mu,
        Properties.iso_polarizability: QM9.alpha
Example #12
0
import pandas as pd
from keras.callbacks import EarlyStopping

from sacred import Ingredient
from sklearn import preprocessing
from sklearn.cross_validation import KFold

from pypagai.util.class_loader import ClassLoader

tb_callback = keras.callbacks.TensorBoard(log_dir='.log/',
                                          histogram_freq=0,
                                          write_graph=True,
                                          write_images=True)

LOG = logging.getLogger('pypagai-logger')
model_ingredient = Ingredient('model_default_cfg')


@model_ingredient.config
def default_model_configuration():
    """
    Model configuration
    """
    model = 'pypagai.models.model_lstm.SimpleLSTM'  # Path to the ML model
    verbose = False  # True to print info about train


class BaseModel:
    """
    Base model is the class used by all model classes in the experiment framework
Example #13
0
from typing import Optional, Tuple, List

import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import CuDNNLSTM as LSTM
from tensorflow.keras.layers import Embedding, Dropout
from sacred import Ingredient

from rinokeras.layers import Stack
from .AbstractTapeModel import AbstractTapeModel


lstm_hparams = Ingredient('lstm')


@lstm_hparams.config
def configure_lstm():
    n_units = 1024  # noqa: F841
    n_layers = 3  # noqa: F841
    dropout = 0.1  # noqa: F841


class BidirectionalLSTM(AbstractTapeModel):

    @lstm_hparams.capture
    def __init__(self,
                 n_symbols: int,
                 n_units: int = 1024,
                 n_layers: int = 3,
                 dropout: Optional[float] = 0.1) -> None:
        super().__init__(n_symbols)
Example #14
0
from sacred import Ingredient
from schnetpack.md.system import System
from schnetpack.sacred.initializer_ingredient import initializer_ing, \
    build_initializer

system_ingredient = Ingredient('system', ingredients=[initializer_ing])


@system_ingredient.config
def config():
    """configuration for the system ingredient"""
    n_replicas = 1
    path_to_molecules = 'ethanol.xyz'


@system_ingredient.named_config
def ring_polymer():
    """configuration for the system ingredient"""
    n_replicas = 4
    path_to_molecules = 'ethanol.xyz'


@system_ingredient.capture
def build_system(_log, n_replicas, device, path_to_molecules):
    initializer_object = build_initializer()
    _log.info(f'Setting up system with {n_replicas} replicas')
    system = System(n_replicas, device, initializer=initializer_object)

    _log.info(f'Loading molecules from {path_to_molecules}...')
    system.load_molecules_from_xyz(path_to_molecules)
    _log.info(f'Found {system.n_molecules} molecules...')
Example #15
0
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sacred import Ingredient

cleaner_ingredient = Ingredient("data_cleaning")
cleaner_ingredient.add_config("config.yaml")


class ApplicationCleaning(BaseEstimator, TransformerMixin):
    """
    Очистка данных из источника application_train / application_test.

    Parameters
    ----------
    fill_missing: bool, optional, default = False
        Флаг заполнения пропусков. Опциональный параметр, по умолчанию, не используется.

    fill_value: float, optional, default = 0
        Значение для заполнения пропусков.

    copy: bool, optional, default = True
        Если True, то для преобразования используется копия данных, иначе исходный набор
        данных. Опциональный параметр, по умолчанию, используется копия данных.

    """
    def __init__(self,
                 fill_missing: bool = False,
                 fill_value: float = 0,
                 copy: bool = True) -> None:
        self.fill_missing = fill_missing
- https://github.com/hassony2/torch_videovision
- https://github.com/YU-Zhiyang/opencv_transforms_torchvision
"""

import numbers
import random

import cv2
import numpy as np
import PIL
import scipy
import torch
import torchvision
from sacred import Ingredient

training_ingredient = Ingredient('transforms')


@training_ingredient.config
def training_config():
    preprocess_fns = [
        {
            'fn': 'resize_clip',
            'args': {
                'size': (224, 224)
            }
        },
        {
            'fn': 'normalize',
            'args': {}
        },
from torch.utils import data
from tensorboardX import SummaryWriter

from label_predict import Classifier, validate_label_prediction
from run_sacred import get_model, get_dataset
from run_sacred import data_ingredient, method_ingredient, optim_ingredient, get_feature_of
from sacred import Experiment, Ingredient
from sacred_wrap import MongoExtractor

from utils import get_split_samplers, SplitBatchSampler
from utils import get_split_datasets
from utils import flatten_dict
from divergence import CMD, pairwise_divergence


classifier_ingredient = Ingredient('classifier')
classifier_ingredient.add_config({
    'pretrain': False,
    'finetune_g': False,
    'use_c_enc': False,
    'finetune_c': False,
    'hiddens': None,
    'auxiliary': 0.0,
    'label_size': 1.0,
})

classifier_optim_ingredient = Ingredient('classifier_optim')
classifier_optim_ingredient.add_config({
    'lr': 0.001,
    'num_batch': 30000,
    'batch_size': 128,
from sacred import Ingredient

from hibashi.data.datasets.datasets import Dataset
from hibashi.models.finetune.data import datasets, augmentations

train = Ingredient('train')
train_data = Ingredient('train_data')
val_data = Ingredient('val_data')


@train.config
def train_cfg():
    connect_mongo_db = False
    connect_slack = False
    n_epochs = 160
    metadata_path = '/home/ubuntu/tensorboard_logs'  # '/Users/elias/Downloads' #'/home/ubuntu/tensorboard_logs'
    log_interval = 100
    img_log_interval = 1000
    eval_interval = 125  # Run evaluator every n iterations
    save_interval = 1
    save_n_last = 5
    overwrite_id_with = '17-ce_weighted-lr_schedule_Affine_RandomResizedCropFlip-imagenet_pretrained'


@train_data.config
def train_data_cfg():
    name = 'FashionFinetuneTrain'

    ds_params = Dataset.get_dataset_params(name)
    ds_params['base_data_path'] = '/mnt/ramdisk/fashion-dataset' # '/Users/elias/Google Drive/datasets/fashion-dataset' # '/mnt/ramdisk/fashion-dataset'
    ds_params['aug_names'] = ('Affine', 'RandomResizedCropFlip')
Example #19
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################

import json

from sacred import Ingredient

from ingredients.corpus import ing as corpus_ingredient, read_jsonl
from models import AbstractSummarizer

# TODO Putting corpus ingredient here does not feel right. When summarizing, we do not need
# the corpus. Any jsonl file will do. What we need here is the `read_jsonl` function and its
# preprocessing. That might be best put in a separate ingredient.
ing = Ingredient('summ', ingredients=[corpus_ingredient])


@ing.config
def cfg():
    # path to the JSONL file to summarize
    path = 'test.jsonl'
    # extract at most this number of sentences as summary
    size = 3


@ing.capture
def run_summarization(model: AbstractSummarizer, path, size=3):
    for doc in read_jsonl(path):
        summary = set(model.summarize(doc, size=size))
        sent_id = 0
Example #20
0
from typing import Optional, Tuple, List

import numpy as np
from sacred import Ingredient

import rinokeras as rk
from rinokeras.models.transformer import TransformerInputEmbedding, TransformerEncoder

from .AbstractTapeModel import AbstractTapeModel

transformer_hparams = Ingredient('transformer')


@transformer_hparams.config
def configure_transformer():
    n_layers = 12  # noqa: F841
    n_heads = 8  # noqa: F841
    d_model = 512  # noqa: F841
    d_filter = 4 * d_model  # noqa: F841
    dropout = 0.1  # noqa: F841
    layer_dropout = 0.  # noqa: F841
    kernel_regularizer = None  # noqa: F841


class Transformer(AbstractTapeModel):
    @transformer_hparams.capture
    def __init__(self,
                 n_symbols: int,
                 n_layers: int = 12,
                 n_heads: int = 8,
                 d_model: int = 512,
Example #21
0
from math import sqrt

from sacred import Ingredient
from torch.optim.lr_scheduler import LambdaLR

lr_schedule_ingredient = Ingredient("lr_schedule")


@lr_schedule_ingredient.config
def config():
    """
    Possible schedule types:
        constant - Is fixed at base_lr
        linear - Linearly interpolates between scheduled_lrs over durations specified in
            lr_durations and remains constant at the last lr
        fixed_schedule - Iterates through scheduled_lrs over durations specified in
            lr_durations and remains constant at the last lr
    """
    schedule_type = "fixed_schedule"
    base_lr = 0.001
    fixed_embedding_lr = 50
    if schedule_type == "linear" or schedule_type == "fixed_schedule":
        base_lr = 1

    scheduled_lrs = [0.001, 0.0005]
    lr_durations = [1]
    decay_rate = 20


@lr_schedule_ingredient.capture
def get_lr_scheduler(optimizer, schedule_type, base_lr, scheduled_lrs,
Example #22
0
from typing import Optional, Tuple, List

import tensorflow as tf
from tensorflow.keras.layers import Embedding, Lambda
import numpy as np
from sacred import Ingredient

import rinokeras as rk
from rinokeras.layers import Stack, ResidualBlock, PaddedConv, PositionEmbedding

from .AbstractTapeModel import AbstractTapeModel

vae_hparams = Ingredient('vae')


@vae_hparams.config
def configure_vae():
    n_layers = 35  # noqa: F841
    filters = 256  # noqa: F841
    kernel_size = 9  # noqa: F841
    layer_norm = False  # noqa: F841
    activation = 'relu'  # noqa: F841
    dilation_rate = 2  # noqa: F841
    dropout = 0.1  # noqa: F841
    latent_size = 2


class VAE(AbstractTapeModel):
    @vae_hparams.capture
    def __init__(self,
                 n_symbols: int,
Example #23
0
import os
import time
from collections import OrderedDict
from sacred import Ingredient
from tqdm import tqdm
import numpy as np
import torch
from torch import nn, optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from ignite.engine import create_supervised_trainer, create_supervised_evaluator, Events
from ignite.metrics import Accuracy, Loss, RunningAverage
from ignite.handlers import ModelCheckpoint, EarlyStopping
from ignite.contrib.metrics.roc_auc import ROC_AUC
from nets.losses import RMSELoss, PvarLossWrapper

train_ingredient = Ingredient('train')


@train_ingredient.config
def config():
    optimizer_name = 'adam'
    loss_str = 'ce'
    lr = None
    max_epochs = 1000
    metrics = ['loss']
    val_metric_to_monitor = 'loss'
    epoch_per_metric = 1
    print_freq = 5
    plateau_patience = 15
    plateau_terminate = 60
    gpu_if_available = True
Example #24
0
import pandas as pd
from sklearn.model_selection import train_test_split
from sacred import Ingredient

train_data_ingredient = Ingredient('train_dataset')


@train_data_ingredient.config
def cfg():
    filename = 'data/train.csv'
    target = 'Survived'
    split_size = .75


@train_data_ingredient.capture
def load_data(filename, target, split_size):
    data = pd.read_csv(filename)
    features = [i for i in data.columns if i != target]
    return train_test_split(data[features],
                            data[target],
                            train_size=split_size)
Example #25
0
from CMerModel import CMerModel, ppr_factory, FunctionHolder, arrayHasher
from CMerModel import SetArrayHasher
from CMerModel import ArrayPerfectHasher
from CMerModel import DictArrayHasher
import numpy as np
import pandas as pd
from sacred import Ingredient
from itertools import combinations
from scipy import sparse

cf_ingredient = Ingredient('cf')
prunner = ppr_factory()
# imap = arrayHasher()
# imap = SetArrayHasher()
imap = DictArrayHasher()


@cf_ingredient.config
def cfg():
    use_counts = False
    mers = 1


@cf_ingredient.capture
def gen_indices_map(cf_counts_dfs, use_counts, mers):
    # type: (list, bool, int) -> object
    if use_counts:
        for df in cf_counts_dfs:
            imap.hashArrayWithRep(df['feature'].values)
    else:
        for df in cf_counts_dfs:
Example #26
0
import json

from sacred import Ingredient
from sklearn.metrics import confusion_matrix, f1_score, precision_recall_fscore_support

from ingredients.corpus import ing as corpus_ingredient, read_train_corpus, read_dev_corpus, \
    read_test_corpus
from utils import SACRED_OBSERVE_FILES

ing = Ingredient('eval', ingredients=[corpus_ingredient])


@ing.config
def cfg():
    # which set of the corpus to evaluate on [train, dev, test]
    which = 'test'
    # where to serialize the full evaluation result
    path = None
    # where to save the confusion matrix
    cm_path = None
    # whether to use weighted macro-averaged F1
    weighted = True


@ing.capture
def evaluate_fully(gold_labels, pred_labels, path, _log, _run, result=None):
    if result is None:
        result = {}

    all_labels = list(set(gold_labels + pred_labels))
    prec, rec, f1, _ = precision_recall_fscore_support(gold_labels,
Example #27
0
from .utils import get_mi, get_cond_entropy, get_entropy, get_one_hot
from tqdm import tqdm
from sacred import Ingredient
import torch
import time

tim_ingredient = Ingredient('tim')


@tim_ingredient.config
def config():
    temp = 15
    loss_weights = [0.1, 1.0, 0.1]  # [Xent, H(Y), H(Y|X)]
    lr = 1e-4
    iter = 150
    alpha = 1.0


class TIM(object):
    @tim_ingredient.capture
    def __init__(self, temp, loss_weights, iter, model):
        self.temp = temp
        self.loss_weights = loss_weights.copy()
        self.iter = iter
        self.model = model
        self.init_info_lists()

    def init_info_lists(self):
        self.timestamps = []
        self.mutual_infos = []
        self.entropy = []
Example #28
0
import os
# set numpy environment variables
os.environ["OMP_NUM_THREADS"] = "1"  # export OMP_NUM_THREADS=4
os.environ["OPENBLAS_NUM_THREADS"] = "1"  # export OPENBLAS_NUM_THREADS=4
os.environ["MKL_NUM_THREADS"] = "1"  # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"  # export VECLIB_MAXIMUM_THREADS=4
os.environ["NUMEXPR_NUM_THREADS"] = "1"  # export NUMEXPR_NUM_THREADS=6

import logging
import sys

import numpy as np
import torch
from sacred import Ingredient

dataset_dist_ingred = Ingredient('dataset_dist')
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
logger = logging.getLogger("Dataset Distribution")


@dataset_dist_ingred.config
def cfg():
    M = 10
    client_size_factor = 0
    class_balance_factor = 0
    dataset_seed = None


@dataset_dist_ingred.capture
def generate_dataset_distribution_func(_run, M, client_size_factor,
                                       class_balance_factor, dataset_seed):
Example #29
0
from sacred import Ingredient
import os
import torch

from schnetpack.md.calculators import SchnetPackCalculator
from schnetpack.md.utils import MDUnits

calculator_ingradient = Ingredient('calculator')


@calculator_ingradient.config
def config():
    """configuration for the calculator ingredient"""
    calculator = 'schnet_calculator'
    required_properties = ['y', 'dydx']
    force_handle = 'dydx'
    position_conversion = 1.0 / MDUnits.angs2bohr
    force_conversion = 1.0 / MDUnits.auforces2aseforces
    property_conversion = {}

    model_path = 'eth_ens_01.model'
    # If model is a directory, search for best_model file
    if os.path.isdir(model_path):
        model_path = os.path.join(model_path, 'best_model')


@calculator_ingradient.capture
def load_model(_log, model_path, device):
    _log.info('Loaded model from {:s}'.format(model_path))
    model = torch.load(model_path).to(device)
    return model
Example #30
0
import time
import tqdm
import torch
import pandas as pd
from sacred import Ingredient

from dfp.utils import save_model

evaluator = Ingredient('evaluator')


@evaluator.capture
def evaluate_policy(env, model, policy, n_eval_episodes, epoch, run_dir, _log,
                    _run):

    logger = _log

    # EVALUATION
    logger.info(f"Evaluating ...")
    eval_tic = time.time()
    model.eval()

    eval_metrics = []
    with torch.no_grad():

        for _ in tqdm.trange(n_eval_episodes):
            obs = env.reset()
            done = False
            episode_reward = 0
            episode_steps = 0
            while not done: