def test_circular_dependency_raises(): # create experiment with circular dependency ing = Ingredient('ing') ex = Experiment('exp', ingredients=[ing]) ex.main(lambda: None) ing.ingredients.append(ex) # run and see if it raises with pytest.raises(CircularDependencyError, match='exp->ing->exp'): ex.run()
def test_format_named_configs(): ingred = Ingredient('ingred') ex = Experiment(name='experiment', ingredients=[ingred]) @ingred.named_config def named_config1(): pass @ex.named_config def named_config2(): """named config with doc""" pass dict_config = dict(v=42) ingred.add_named_config('dict_config', dict_config) named_configs_text = _format_named_configs( OrderedDict(ex.gather_named_configs())) assert named_configs_text.startswith('Named Configurations (' + COLOR_DOC + 'doc' + ENDC + '):') assert 'named_config2' in named_configs_text assert '# named config with doc' in named_configs_text assert 'ingred.named_config1' in named_configs_text assert 'ingred.dict_config' in named_configs_text
def test_format_named_configs(): ingred = Ingredient("ingred") ex = Experiment(name="experiment", ingredients=[ingred]) @ingred.named_config def named_config1(): pass @ex.named_config def named_config2(): """named config with doc""" pass dict_config = dict(v=42) ingred.add_named_config("dict_config", dict_config) named_configs_text = _format_named_configs( OrderedDict(ex.gather_named_configs())) assert named_configs_text.startswith("Named Configurations (" + COLOR_DOC + "doc" + ENDC + "):") assert "named_config2" in named_configs_text assert "# named config with doc" in named_configs_text assert "ingred.named_config1" in named_configs_text assert "ingred.dict_config" in named_configs_text
def test_named_config_and_ingredient(): ing = Ingredient("foo") @ing.config def cfg(): a = 10 ex = Experiment(ingredients=[ing]) @ex.config def default(): b = 20 @ex.named_config def named(): b = 30 @ex.main def main(): pass r = ex.run(named_configs=["named"]) assert r.config["b"] == 30 assert r.config["foo"] == {"a": 10}
def test_named_config_and_ingredient(): ing = Ingredient('foo') @ing.config def cfg(): a = 10 ex = Experiment(ingredients=[ing]) @ex.config def default(): b = 20 @ex.named_config def named(): b = 30 @ex.main def main(): pass r = ex.run(named_configs=['named']) assert r.config['b'] == 30 assert r.config['foo'] == {'a': 10}
import numpy as np from sacred import Ingredient config_ingredient = Ingredient("cfg") @config_ingredient.config def cfg(): # Base configuration model_config = { "musdb_path": "C:/Users/Joaquin/Documents/GitHub/Wave-U-Net/Datasets/MUSDB18/", # SET MUSDB PATH HERE, AND SET CCMIXTER PATH IN CCMixter.xml "estimates_path": "C:/Users/Joaquin/Documents/GitHub/Wave-U-Net/Source_Estimates", # SET THIS PATH TO WHERE YOU WANT SOURCE ESTIMATES PRODUCED BY THE TRAINED MODEL TO BE SAVED. Folder itself must exist! "data_path": "data", # Set this to where the preprocessed dataset should be saved "model_base_dir": "checkpoints", # Base folder for model checkpoints "log_dir": "logs", # Base folder for logs files "batch_size": 16, # Batch size "init_sup_sep_lr": 1e-4, # Supervised separator learning rate (originally 1e-4) "epoch_it": 10, # Number of supervised separator steps per epoch (originally 2000) 'cache_size': 4000, # Number of audio snippets buffered in the random shuffle queue. Larger is better, since workers put multiple examples of one song into this queue. The number of different songs that is sampled from with each batch equals cache_size / num_snippets_per_track. Set as high as your RAM allows. 'num_workers': 4, # Number of processes used for each TF map operation used when loading the dataset "num_snippets_per_track": 100, # Number of snippets that should be extracted from each song at a time after loading it. Higher values make data loading faster, but can reduce the batches song diversity (originally 100) 'num_layers': 12, # How many U-Net layers 'filter_size':
"""Module containing sacred functions for handling ML models.""" import inspect from sacred import Ingredient from src import models ingredient = Ingredient('model') @ingredient.config def cfg(): """Model configuration.""" name = '' parameters = { } @ingredient.named_config def TopologicalSurrogateAutoencoder(): """TopologicalSurrogateAutoencoder.""" name = 'TopologicalSurrogateAutoencoder' parameters = { 'd_latent': 8*2*2, 'batch_size': 32, 'arch': [256, 256, 256, 256] } @ingredient.named_config def Vanilla():
from sacred import Ingredient experiment_config = Ingredient('config') @experiment_config.config def cfg(): data_fetch_config = { "data_dir": "./images/dset1", "batch_size": 128, "train_valid_split_ratio": 0.9 } AlexNet_config = { "num_class": 65, "stop_gradient_layer": "fc7", "dropout_prob": 0.5, "stddev": 1e-4, "weight_decay": 0.0, "NUM_EPOCHES_PER_DECAY": 10, "INITIAL_LEARNING_RATE": 0.005, "LEARNING_RATE_DECAY_FACTOR": 0.9, "MAX_EPOCHES": 1000, "VALID_STEPS": 100, "batch_size": data_fetch_config['batch_size'] }
from sacred import Ingredient, Experiment # ================== Dataset Ingredient ======================================= # could be in a separate file data_ingredient = Ingredient('dataset') @data_ingredient.config def cfg1(): filename = 'my_dataset.npy' # dataset filename normalize = True # normalize dataset @data_ingredient.capture def load_data(filename, normalize): print("loading dataset from '{}'".format(filename)) if normalize: print("normalizing dataset") return 1 return 42 @data_ingredient.command def stats(filename, foo=12): print('Statistics for dataset "{}":'.format(filename)) print('mean = 42.23') print('foo=', foo) # ================== Experiment ===============================================
import os import numpy as np import pandas as pd import gc from common.utils import pprint, robust_zscore, download_http_resource from loaders.dataset_v3 import Dataset from tqdm import tqdm import copy from sacred import Ingredient import pickle DATA_SERVER = os.environ.get('DATA_SERVER', 'http://10.150.144.154:10020') data_ingredient = Ingredient('daily_loader_v3') EPS = 1e-12 @data_ingredient.config def data_config(): dset = 'day_csi300' # dataset label_id = 0 # LABEL$i log_transform = False train_start_date = '2007-01-01' train_end_date = '2014-12-31' valid_start_date = '2015-01-01' valid_end_date = '2016-12-31' test_start_date = '2017-01-01' test_end_date = '2019-06-18'
from sacred import Ingredient from schnetpack.datasets import ANI1, ISO17, QM9, MD17, MaterialsProject from schnetpack.data import AtomsData, AtomsDataError from schnetpack.atomistic import Properties dataset_ingredient = Ingredient("dataset") @dataset_ingredient.config def cfg(): """ Base configuration for the dataset. """ dbpath = None dataset = 'CUSTOM' property_mapping = {} @dataset_ingredient.named_config def qm9(): """ Default configuration for the QM9 dataset. """ dbpath = './data/qm9.db' dataset = 'QM9' property_mapping = { Properties.energy: QM9.U0, Properties.dipole_moment: QM9.mu, Properties.iso_polarizability: QM9.alpha
import pandas as pd from keras.callbacks import EarlyStopping from sacred import Ingredient from sklearn import preprocessing from sklearn.cross_validation import KFold from pypagai.util.class_loader import ClassLoader tb_callback = keras.callbacks.TensorBoard(log_dir='.log/', histogram_freq=0, write_graph=True, write_images=True) LOG = logging.getLogger('pypagai-logger') model_ingredient = Ingredient('model_default_cfg') @model_ingredient.config def default_model_configuration(): """ Model configuration """ model = 'pypagai.models.model_lstm.SimpleLSTM' # Path to the ML model verbose = False # True to print info about train class BaseModel: """ Base model is the class used by all model classes in the experiment framework
from typing import Optional, Tuple, List import tensorflow as tf import numpy as np from tensorflow.keras.layers import CuDNNLSTM as LSTM from tensorflow.keras.layers import Embedding, Dropout from sacred import Ingredient from rinokeras.layers import Stack from .AbstractTapeModel import AbstractTapeModel lstm_hparams = Ingredient('lstm') @lstm_hparams.config def configure_lstm(): n_units = 1024 # noqa: F841 n_layers = 3 # noqa: F841 dropout = 0.1 # noqa: F841 class BidirectionalLSTM(AbstractTapeModel): @lstm_hparams.capture def __init__(self, n_symbols: int, n_units: int = 1024, n_layers: int = 3, dropout: Optional[float] = 0.1) -> None: super().__init__(n_symbols)
from sacred import Ingredient from schnetpack.md.system import System from schnetpack.sacred.initializer_ingredient import initializer_ing, \ build_initializer system_ingredient = Ingredient('system', ingredients=[initializer_ing]) @system_ingredient.config def config(): """configuration for the system ingredient""" n_replicas = 1 path_to_molecules = 'ethanol.xyz' @system_ingredient.named_config def ring_polymer(): """configuration for the system ingredient""" n_replicas = 4 path_to_molecules = 'ethanol.xyz' @system_ingredient.capture def build_system(_log, n_replicas, device, path_to_molecules): initializer_object = build_initializer() _log.info(f'Setting up system with {n_replicas} replicas') system = System(n_replicas, device, initializer=initializer_object) _log.info(f'Loading molecules from {path_to_molecules}...') system.load_molecules_from_xyz(path_to_molecules) _log.info(f'Found {system.n_molecules} molecules...')
import numpy as np import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin from sacred import Ingredient cleaner_ingredient = Ingredient("data_cleaning") cleaner_ingredient.add_config("config.yaml") class ApplicationCleaning(BaseEstimator, TransformerMixin): """ Очистка данных из источника application_train / application_test. Parameters ---------- fill_missing: bool, optional, default = False Флаг заполнения пропусков. Опциональный параметр, по умолчанию, не используется. fill_value: float, optional, default = 0 Значение для заполнения пропусков. copy: bool, optional, default = True Если True, то для преобразования используется копия данных, иначе исходный набор данных. Опциональный параметр, по умолчанию, используется копия данных. """ def __init__(self, fill_missing: bool = False, fill_value: float = 0, copy: bool = True) -> None: self.fill_missing = fill_missing
- https://github.com/hassony2/torch_videovision - https://github.com/YU-Zhiyang/opencv_transforms_torchvision """ import numbers import random import cv2 import numpy as np import PIL import scipy import torch import torchvision from sacred import Ingredient training_ingredient = Ingredient('transforms') @training_ingredient.config def training_config(): preprocess_fns = [ { 'fn': 'resize_clip', 'args': { 'size': (224, 224) } }, { 'fn': 'normalize', 'args': {} },
from torch.utils import data from tensorboardX import SummaryWriter from label_predict import Classifier, validate_label_prediction from run_sacred import get_model, get_dataset from run_sacred import data_ingredient, method_ingredient, optim_ingredient, get_feature_of from sacred import Experiment, Ingredient from sacred_wrap import MongoExtractor from utils import get_split_samplers, SplitBatchSampler from utils import get_split_datasets from utils import flatten_dict from divergence import CMD, pairwise_divergence classifier_ingredient = Ingredient('classifier') classifier_ingredient.add_config({ 'pretrain': False, 'finetune_g': False, 'use_c_enc': False, 'finetune_c': False, 'hiddens': None, 'auxiliary': 0.0, 'label_size': 1.0, }) classifier_optim_ingredient = Ingredient('classifier_optim') classifier_optim_ingredient.add_config({ 'lr': 0.001, 'num_batch': 30000, 'batch_size': 128,
from sacred import Ingredient from hibashi.data.datasets.datasets import Dataset from hibashi.models.finetune.data import datasets, augmentations train = Ingredient('train') train_data = Ingredient('train_data') val_data = Ingredient('val_data') @train.config def train_cfg(): connect_mongo_db = False connect_slack = False n_epochs = 160 metadata_path = '/home/ubuntu/tensorboard_logs' # '/Users/elias/Downloads' #'/home/ubuntu/tensorboard_logs' log_interval = 100 img_log_interval = 1000 eval_interval = 125 # Run evaluator every n iterations save_interval = 1 save_n_last = 5 overwrite_id_with = '17-ce_weighted-lr_schedule_Affine_RandomResizedCropFlip-imagenet_pretrained' @train_data.config def train_data_cfg(): name = 'FashionFinetuneTrain' ds_params = Dataset.get_dataset_params(name) ds_params['base_data_path'] = '/mnt/ramdisk/fashion-dataset' # '/Users/elias/Google Drive/datasets/fashion-dataset' # '/mnt/ramdisk/fashion-dataset' ds_params['aug_names'] = ('Affine', 'RandomResizedCropFlip')
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ########################################################################## import json from sacred import Ingredient from ingredients.corpus import ing as corpus_ingredient, read_jsonl from models import AbstractSummarizer # TODO Putting corpus ingredient here does not feel right. When summarizing, we do not need # the corpus. Any jsonl file will do. What we need here is the `read_jsonl` function and its # preprocessing. That might be best put in a separate ingredient. ing = Ingredient('summ', ingredients=[corpus_ingredient]) @ing.config def cfg(): # path to the JSONL file to summarize path = 'test.jsonl' # extract at most this number of sentences as summary size = 3 @ing.capture def run_summarization(model: AbstractSummarizer, path, size=3): for doc in read_jsonl(path): summary = set(model.summarize(doc, size=size)) sent_id = 0
from typing import Optional, Tuple, List import numpy as np from sacred import Ingredient import rinokeras as rk from rinokeras.models.transformer import TransformerInputEmbedding, TransformerEncoder from .AbstractTapeModel import AbstractTapeModel transformer_hparams = Ingredient('transformer') @transformer_hparams.config def configure_transformer(): n_layers = 12 # noqa: F841 n_heads = 8 # noqa: F841 d_model = 512 # noqa: F841 d_filter = 4 * d_model # noqa: F841 dropout = 0.1 # noqa: F841 layer_dropout = 0. # noqa: F841 kernel_regularizer = None # noqa: F841 class Transformer(AbstractTapeModel): @transformer_hparams.capture def __init__(self, n_symbols: int, n_layers: int = 12, n_heads: int = 8, d_model: int = 512,
from math import sqrt from sacred import Ingredient from torch.optim.lr_scheduler import LambdaLR lr_schedule_ingredient = Ingredient("lr_schedule") @lr_schedule_ingredient.config def config(): """ Possible schedule types: constant - Is fixed at base_lr linear - Linearly interpolates between scheduled_lrs over durations specified in lr_durations and remains constant at the last lr fixed_schedule - Iterates through scheduled_lrs over durations specified in lr_durations and remains constant at the last lr """ schedule_type = "fixed_schedule" base_lr = 0.001 fixed_embedding_lr = 50 if schedule_type == "linear" or schedule_type == "fixed_schedule": base_lr = 1 scheduled_lrs = [0.001, 0.0005] lr_durations = [1] decay_rate = 20 @lr_schedule_ingredient.capture def get_lr_scheduler(optimizer, schedule_type, base_lr, scheduled_lrs,
from typing import Optional, Tuple, List import tensorflow as tf from tensorflow.keras.layers import Embedding, Lambda import numpy as np from sacred import Ingredient import rinokeras as rk from rinokeras.layers import Stack, ResidualBlock, PaddedConv, PositionEmbedding from .AbstractTapeModel import AbstractTapeModel vae_hparams = Ingredient('vae') @vae_hparams.config def configure_vae(): n_layers = 35 # noqa: F841 filters = 256 # noqa: F841 kernel_size = 9 # noqa: F841 layer_norm = False # noqa: F841 activation = 'relu' # noqa: F841 dilation_rate = 2 # noqa: F841 dropout = 0.1 # noqa: F841 latent_size = 2 class VAE(AbstractTapeModel): @vae_hparams.capture def __init__(self, n_symbols: int,
import os import time from collections import OrderedDict from sacred import Ingredient from tqdm import tqdm import numpy as np import torch from torch import nn, optim from torch.optim.lr_scheduler import ReduceLROnPlateau from ignite.engine import create_supervised_trainer, create_supervised_evaluator, Events from ignite.metrics import Accuracy, Loss, RunningAverage from ignite.handlers import ModelCheckpoint, EarlyStopping from ignite.contrib.metrics.roc_auc import ROC_AUC from nets.losses import RMSELoss, PvarLossWrapper train_ingredient = Ingredient('train') @train_ingredient.config def config(): optimizer_name = 'adam' loss_str = 'ce' lr = None max_epochs = 1000 metrics = ['loss'] val_metric_to_monitor = 'loss' epoch_per_metric = 1 print_freq = 5 plateau_patience = 15 plateau_terminate = 60 gpu_if_available = True
import pandas as pd from sklearn.model_selection import train_test_split from sacred import Ingredient train_data_ingredient = Ingredient('train_dataset') @train_data_ingredient.config def cfg(): filename = 'data/train.csv' target = 'Survived' split_size = .75 @train_data_ingredient.capture def load_data(filename, target, split_size): data = pd.read_csv(filename) features = [i for i in data.columns if i != target] return train_test_split(data[features], data[target], train_size=split_size)
from CMerModel import CMerModel, ppr_factory, FunctionHolder, arrayHasher from CMerModel import SetArrayHasher from CMerModel import ArrayPerfectHasher from CMerModel import DictArrayHasher import numpy as np import pandas as pd from sacred import Ingredient from itertools import combinations from scipy import sparse cf_ingredient = Ingredient('cf') prunner = ppr_factory() # imap = arrayHasher() # imap = SetArrayHasher() imap = DictArrayHasher() @cf_ingredient.config def cfg(): use_counts = False mers = 1 @cf_ingredient.capture def gen_indices_map(cf_counts_dfs, use_counts, mers): # type: (list, bool, int) -> object if use_counts: for df in cf_counts_dfs: imap.hashArrayWithRep(df['feature'].values) else: for df in cf_counts_dfs:
import json from sacred import Ingredient from sklearn.metrics import confusion_matrix, f1_score, precision_recall_fscore_support from ingredients.corpus import ing as corpus_ingredient, read_train_corpus, read_dev_corpus, \ read_test_corpus from utils import SACRED_OBSERVE_FILES ing = Ingredient('eval', ingredients=[corpus_ingredient]) @ing.config def cfg(): # which set of the corpus to evaluate on [train, dev, test] which = 'test' # where to serialize the full evaluation result path = None # where to save the confusion matrix cm_path = None # whether to use weighted macro-averaged F1 weighted = True @ing.capture def evaluate_fully(gold_labels, pred_labels, path, _log, _run, result=None): if result is None: result = {} all_labels = list(set(gold_labels + pred_labels)) prec, rec, f1, _ = precision_recall_fscore_support(gold_labels,
from .utils import get_mi, get_cond_entropy, get_entropy, get_one_hot from tqdm import tqdm from sacred import Ingredient import torch import time tim_ingredient = Ingredient('tim') @tim_ingredient.config def config(): temp = 15 loss_weights = [0.1, 1.0, 0.1] # [Xent, H(Y), H(Y|X)] lr = 1e-4 iter = 150 alpha = 1.0 class TIM(object): @tim_ingredient.capture def __init__(self, temp, loss_weights, iter, model): self.temp = temp self.loss_weights = loss_weights.copy() self.iter = iter self.model = model self.init_info_lists() def init_info_lists(self): self.timestamps = [] self.mutual_infos = [] self.entropy = []
import os # set numpy environment variables os.environ["OMP_NUM_THREADS"] = "1" # export OMP_NUM_THREADS=4 os.environ["OPENBLAS_NUM_THREADS"] = "1" # export OPENBLAS_NUM_THREADS=4 os.environ["MKL_NUM_THREADS"] = "1" # export MKL_NUM_THREADS=6 os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # export VECLIB_MAXIMUM_THREADS=4 os.environ["NUMEXPR_NUM_THREADS"] = "1" # export NUMEXPR_NUM_THREADS=6 import logging import sys import numpy as np import torch from sacred import Ingredient dataset_dist_ingred = Ingredient('dataset_dist') logging.basicConfig(level=logging.INFO, stream=sys.stdout) logger = logging.getLogger("Dataset Distribution") @dataset_dist_ingred.config def cfg(): M = 10 client_size_factor = 0 class_balance_factor = 0 dataset_seed = None @dataset_dist_ingred.capture def generate_dataset_distribution_func(_run, M, client_size_factor, class_balance_factor, dataset_seed):
from sacred import Ingredient import os import torch from schnetpack.md.calculators import SchnetPackCalculator from schnetpack.md.utils import MDUnits calculator_ingradient = Ingredient('calculator') @calculator_ingradient.config def config(): """configuration for the calculator ingredient""" calculator = 'schnet_calculator' required_properties = ['y', 'dydx'] force_handle = 'dydx' position_conversion = 1.0 / MDUnits.angs2bohr force_conversion = 1.0 / MDUnits.auforces2aseforces property_conversion = {} model_path = 'eth_ens_01.model' # If model is a directory, search for best_model file if os.path.isdir(model_path): model_path = os.path.join(model_path, 'best_model') @calculator_ingradient.capture def load_model(_log, model_path, device): _log.info('Loaded model from {:s}'.format(model_path)) model = torch.load(model_path).to(device) return model
import time import tqdm import torch import pandas as pd from sacred import Ingredient from dfp.utils import save_model evaluator = Ingredient('evaluator') @evaluator.capture def evaluate_policy(env, model, policy, n_eval_episodes, epoch, run_dir, _log, _run): logger = _log # EVALUATION logger.info(f"Evaluating ...") eval_tic = time.time() model.eval() eval_metrics = [] with torch.no_grad(): for _ in tqdm.trange(n_eval_episodes): obs = env.reset() done = False episode_reward = 0 episode_steps = 0 while not done: