Exemple #1
0
def config():
    debug = False
    batch_size = 6

    train_dataset = "mix_2_spk_min_tr"
    validate_dataset = "mix_2_spk_min_cv"

    # Start with an empty dict to allow tracking by Sacred
    trainer = {
        "model": {
            "factory": pt.models.bss.PermutationInvariantTrainingModel,
            "dropout_input": 0.,
            "dropout_hidden": 0.,
            "dropout_linear": 0.
        },
        "storage_dir": None,
        "optimizer": {
            "factory": pt.optimizer.Adam,
            "gradient_clipping": 1
        },
        "summary_trigger": (1000, "iteration"),
        "stop_trigger": (300_000, "iteration"),
        "loss_weights": {
            "pit_ips_loss": 1.0,
            "pit_mse_loss": 0.0,
        }
    }
    pt.Trainer.get_config(trainer)
    if trainer['storage_dir'] is None:
        trainer['storage_dir'] = get_new_folder(path_template, mkdir=False)

    ex.observers.append(
        FileStorageObserver.create(Path(trainer['storage_dir']) / 'sacred'))
def test_fs_observer_equality(dir_obs):
    basedir, obs = dir_obs
    obs2 = FileStorageObserver.create(obs.basedir)
    assert obs == obs2
    assert not obs != obs2

    assert not obs == 'foo'
    assert obs != 'foo'
Exemple #3
0
def test_fs_observer_equality(dir_obs):
    basedir, obs = dir_obs
    obs2 = FileStorageObserver.create(obs.basedir)
    assert obs == obs2
    assert not obs != obs2

    assert not obs == "foo"
    assert obs != "foo"
Exemple #4
0
def config():
    debug = False
    batch_size = 4  # Runs on 4GB GPU mem. Can safely be set to 12 on 12 GB (e.g., GTX1080)
    chunk_size = 32000  # 4s chunks @8kHz

    train_dataset = "mix_2_spk_min_tr"
    validate_dataset = "mix_2_spk_min_cv"
    target = 'speech_source'
    lr_scheduler_step = 2
    lr_scheduler_gamma = 0.98
    load_model_from = None
    database_json = None
    if database_json is None and JSON_BASE:
        database_json = Path(JSON_BASE) / 'wsj0_2mix_8k.json'

    if database_json is None:
        raise MissingConfigError(
            'You have to set the path to the database JSON!', 'database_json')
    if not Path(database_json).exists():
        raise InvalidConfigError('The database JSON does not exist!',
                                 'database_json')

    feat_size = 64
    encoder_window_size = 16
    trainer = {
        "model": {
            "factory": padertorch.contrib.examples.source_separation.tasnet.TasNet,
            'encoder': {
                'factory': padertorch.contrib.examples.source_separation.tasnet.tas_coders.TasEncoder,
                'window_length': encoder_window_size,
                'feature_size': feat_size,
            },
            'decoder': {
                'factory': padertorch.contrib.examples.source_separation.tasnet.tas_coders.TasDecoder,
                'window_length': encoder_window_size,
                'feature_size': feat_size,
            },
        },
        "storage_dir": None,
        "optimizer": {
            "factory": pt.optimizer.Adam,
            "gradient_clipping": 1
        },
        "summary_trigger": (1000, "iteration"),
        "stop_trigger": (100, "epoch"),
        "loss_weights": {
            "si-sdr": 1.0,
            "log-mse": 0.0,
            "log1p-mse": 0.0,
        }
    }
    pt.Trainer.get_config(trainer)
    if trainer['storage_dir'] is None:
        trainer['storage_dir'] = pt.io.get_new_storage_dir(experiment_name)

    ex.observers.append(FileStorageObserver(
        Path(trainer['storage_dir']) / 'sacred')
    )
Exemple #5
0
def test_no_sources(tmpdir, tmpfile, sample_run):
    obs = FileStorageObserver(tmpdir, copy_sources=False)
    sample_run["ex_info"]["sources"] = [[tmpfile.name, tmpfile.md5sum]]
    obs.started_event(**sample_run)
    assert not os.path.exists(tmpdir / "_sources")

    # Test the test: that the source would otherwise have been created.
    obs = FileStorageObserver(tmpdir, copy_sources=True)
    sample_run["_id"] = sample_run["_id"] + "_2"
    obs.started_event(**sample_run)
    name, _ = os.path.splitext(os.path.basename(tmpfile.name))
    assert os.path.exists(tmpdir / "_sources")
    assert any(x.startswith(name) for x in os.listdir(tmpdir / "_sources"))
Exemple #6
0
def test_fs_observer_resource_event_does_not_duplicate(dir_obs, sample_run, tmpfile):
    basedir, obs = dir_obs
    obs2 = FileStorageObserver(obs.basedir)
    obs.started_event(**sample_run)

    obs.resource_event(tmpfile.name)
    # let's have another run from a different observer
    sample_run["_id"] = None
    _id = obs2.started_event(**sample_run)
    run_dir = basedir.join(str(_id))
    obs2.resource_event(tmpfile.name)

    res_dir = basedir.join("_resources")
    assert res_dir.exists()
    assert len(res_dir.listdir()) == 1
    assert res_dir.listdir()[0].read() == tmpfile.content

    run = json.loads(run_dir.join("run.json").read())
    assert len(run["resources"]) == 1
    assert run["resources"][0] == [tmpfile.name, res_dir.listdir()[0].strpath]
Exemple #7
0
def config():
    debug = False
    batch_size = 4  # Runs on 4GB GPU mem. Can safely be set to 12 on 12 GB (e.g., GTX1080)
    chunk_size = 32000  # 4s chunks @8kHz

    train_datasets = ["mix_2_spk_min_tr", "mix_3_spk_min_tr"]
    validate_datasets = ["mix_2_spk_min_cv", "mix_3_spk_min_cv"]
    target = 'speech_source'
    lr_scheduler_step = 2
    lr_scheduler_gamma = 0.98
    load_model_from = None
    database_jsons = []

    # if not database_jsons:
    #     raise MissingConfigError(
    #         'You have to set the path to the database JSON!', 'database_jsons')

    # Start with an empty dict to allow tracking by Sacred
    trainer = {
        "model": {
            "factory":
            'padertorch.contrib.examples.or_pit.or_pit.OneAndRestPIT',
            "separator": {
                "factory": 'padertorch.contrib.examples.tasnet.tasnet.TasNet'
            }
        },
        "storage_dir": None,
        "optimizer": {
            "factory": pt.optimizer.Adam,
            "gradient_clipping": 1
        },
        "summary_trigger": (1000, "iteration"),
        "stop_trigger": (100_000, "iteration"),
        "loss_weights": {
            "si-sdr": 0.0,
            "log-mse": 1.0,
            "si-sdr-grad-stop": 0.0,
        }
    }
    pt.Trainer.get_config(trainer)
    if trainer['storage_dir'] is None:
        trainer['storage_dir'] = get_storage_dir()

    ex.observers.append(
        FileStorageObserver(Path(trainer['storage_dir']) / 'sacred'))
Exemple #8
0
def config():
    debug = False
    batch_size = 6
    database_json = None  # Path to WSJ0_2mix .json
    if database_json is None and JSON_BASE:
        database_json = Path(JSON_BASE) / 'wsj0_2mix_8k.json'

    if database_json is None:
        raise MissingConfigError(
            'You have to set the path to the database JSON!', 'database_json')
    if not Path(database_json).exists():
        raise InvalidConfigError('The database JSON does not exist!',
                                 'database_json')
    train_dataset = "mix_2_spk_min_tr"
    validate_dataset = "mix_2_spk_min_cv"

    # Dict describing the model parameters, to allow changing the parameters from the command line.
    # Configurable automatically inserts the default values of not mentioned parameters to the config.json
    trainer = {
        "model": {
            "factory": pt.contrib.examples.source_separation.pit.model.
            PermutationInvariantTrainingModel,
            "dropout_input": 0.,
            "dropout_hidden": 0.,
            "dropout_linear": 0.
        },
        "storage_dir": None,
        "optimizer": {
            "factory": pt.optimizer.Adam,
            "gradient_clipping": 1
        },
        "summary_trigger": (1000, "iteration"),
        "stop_trigger": (300_000, "iteration"),
        "loss_weights": {
            "pit_ips_loss": 1.0,
            "pit_mse_loss": 0.0,
        }
    }
    pt.Trainer.get_config(trainer)
    if trainer['storage_dir'] is None:
        trainer['storage_dir'] = pt.io.get_new_storage_dir(experiment_name)

    ex.observers.append(
        FileStorageObserver(Path(trainer['storage_dir']) / 'sacred'))
def test_fs_observer_resource_event_does_not_duplicate(dir_obs, sample_run,
                                                       tmpfile):
    basedir, obs = dir_obs
    obs2 = FileStorageObserver.create(obs.basedir)
    obs.started_event(**sample_run)

    obs.resource_event(tmpfile.name)
    # let's have another run from a different observer
    sample_run['_id'] = None
    _id = obs2.started_event(**sample_run)
    run_dir = basedir.join(str(_id))
    obs2.resource_event(tmpfile.name)

    res_dir = basedir.join('_resources')
    assert res_dir.exists()
    assert len(res_dir.listdir()) == 1
    assert res_dir.listdir()[0].read() == tmpfile.content

    run = json.loads(run_dir.join('run.json').read())
    assert len(run['resources']) == 1
    assert run['resources'][0] == [tmpfile.name, res_dir.listdir()[0].strpath]
Exemple #10
0
def config():
    debug = False
    batch_size = 6
    database_json = ""  # Path to WSJ0_2mix .json
    if "WSJ0_2MIX" in os.environ:
        database_json = os.environ.get("WSJ0_2MIX")
    assert len(database_json) > 0, 'Set path to database Json on the command line or set environment variable WSJ0_2MIX'
    train_dataset = "mix_2_spk_min_tr"
    validate_dataset = "mix_2_spk_min_cv"

    # dict describing the model parameters, to allow changing the paramters from the command line.
    # Configurable automatically inserts default values of not mentioned parameters to the config.json
    trainer = {
        "model": {
            "factory": pt.contrib.examples.pit.model.PermutationInvariantTrainingModel,
            "dropout_input": 0.,
            "dropout_hidden": 0.,
            "dropout_linear": 0.
        },
        "storage_dir": None,
        "optimizer": {
            "factory": pt.optimizer.Adam,
            "gradient_clipping": 1
        },
        "summary_trigger": (1000, "iteration"),
        "stop_trigger": (300_000, "iteration"),
        "loss_weights": {
            "pit_ips_loss": 1.0,
            "pit_mse_loss": 0.0,
        }
    }
    pt.Trainer.get_config(trainer)
    if trainer['storage_dir'] is None:
        trainer['storage_dir'] = get_new_folder(path_template, mkdir=False)

    ex.observers.append(FileStorageObserver.create(
        Path(trainer['storage_dir']) / 'sacred')
    )
Exemple #11
0
def test_observer_equality(tmpdir):
    observer_1 = FileStorageObserver.create(str(tmpdir / "a"))
    observer_2 = FileStorageObserver.create(str(tmpdir / "b"))
    observer_3 = FileStorageObserver.create(str(tmpdir / "a"))
    assert observer_1 == observer_3
    assert observer_1 != observer_2
Exemple #12
0
def dir_obs(tmpdir):
    basedir = tmpdir.join("file_storage")
    return basedir, FileStorageObserver.create(basedir.strpath)
def dir_obs(tmpdir):
    return tmpdir, FileStorageObserver.create(tmpdir.strpath)
def dir_obs(tmpdir):
    basedir = tmpdir.join('file_storage')
    return basedir, FileStorageObserver.create(basedir.strpath)
Exemple #15
0
from zeiss_umbrella.fundus.setting_parser import get_baseline, get_loss
from zeiss_umbrella.fundus.data import get_fundus_train
from zeiss_umbrella.fundus.train import test_model
import torch
import json
import os
from zeiss_umbrella.config import FILE_OBSERVER_BASE_PATH, FILE_OBSERVER_RESOURCE_PATH, FILE_OBSERVER_SOURCE_PATH
import sacred
from sacred import Experiment
from sacred.observers.file_storage import FileStorageObserver

ex = Experiment('fundus training')
template = "efficientnetb0"
ex.observers.append(
    FileStorageObserver(FILE_OBSERVER_BASE_PATH, FILE_OBSERVER_RESOURCE_PATH,
                        FILE_OBSERVER_SOURCE_PATH, template))


@ex.config
def my_config():
    experiments_path = '/home/jiwu/interpretable-fundus/fundus_experiments'
    exp_dir = 'corruption_experiments/efficientnetb0_corruption_imbalance_3'
    weights_dir = 'corruption_experiments/efficientnetb0_corruption_imbalance_3/train_efficientnetb0_normalize_baseline_unfreezed_crossentropy_parallel_corrupted'
    device = 'cuda:0'
    exp_dir = os.path.join(experiments_path, exp_dir)
    weights_dir = os.path.join(experiments_path, weights_dir)
    with open(os.path.join(exp_dir, 'config.json')) as f:
        config = json.load(f)
    valid_corruption = True

Exemple #16
0
import sacred
from sacred.utils import apply_backspaces_and_linefeeds
from sacred.observers.file_storage import FileStorageObserver

GRAMTOOLS_INSTALL_PATH = '/home/robyn/Documents/gramtools'
GENERATE_PRG_SCRIPT_PATH = os.path.join(GRAMTOOLS_INSTALL_PATH,
                                        'utils/vcf_to_linear_prg.pl')
GENERATE_KMERS_SCRIPT_PATH = os.path.join(GRAMTOOLS_INSTALL_PATH,
                                          'utils/variantKmers.py')
MAP_READS_PATH = os.path.join(GRAMTOOLS_INSTALL_PATH, 'bin', 'gramtools')

experiment = sacred.Experiment()
experiment.captured_out_filter = apply_backspaces_and_linefeeds

file_observer = FileStorageObserver.create('gramtools_runs')
file_observer.save_sources = lambda x: None
experiment.observers.append(file_observer)


def generate_paths(vcf_path, fasta_path):
    """Generate and return all file paths associated with experiment."""
    vcf_path = os.path.abspath(vcf_path)
    file_observer.run_entry['artifacts'].append(vcf_path)

    fasta_path = os.path.abspath(fasta_path)
    file_observer.run_entry['artifacts'].append(fasta_path)

    run_path = os.path.abspath(file_observer.dir)
    data_path = os.path.join(run_path, 'data')
Exemple #17
0
def test_no_duplicate(tmpdir, sample_run):
    obs = FileStorageObserver(tmpdir, copy_artifacts=False)
    file = Path(str(tmpdir / "koko.txt"))
    file.touch()
    obs.started_event(**sample_run)
    obs.resource_event(str(file))
    assert not os.path.exists(tmpdir / "_resources")

    # Test the test: that the resource would otherwise have been created.
    obs = FileStorageObserver(tmpdir, copy_artifacts=True)
    sample_run["_id"] = sample_run["_id"] + "_2"
    obs.started_event(**sample_run)
    obs.resource_event(str(file))
    assert os.path.exists(tmpdir / "_resources")
    assert any(x.startswith("koko") for x in os.listdir(tmpdir / "_resources"))
Exemple #18
0
def config():
    debug = False
    batch_size = 4  # Runs on 4GB GPU mem. Can safely be set to 12 on 12 GB (e.g., GTX1080)
    chunk_size = 32000  # 4s chunks @8kHz

    train_datasets = ["mix_2_spk_min_tr", "mix_3_spk_min_tr"]
    validate_datasets = ["mix_2_spk_min_cv", "mix_3_spk_min_cv"]
    target = 'speech_source'
    lr_scheduler_step = 2
    lr_scheduler_gamma = 0.98
    load_model_from = None
    database_jsons = []

    if len(database_jsons) == 0 and JSON_BASE:
        database_jsons = [
            Path(JSON_BASE) / 'wsj0_2mix_8k.json',
            Path(JSON_BASE) / 'wsj0_3mix_8k.json',
        ]

    # if not database_jsons:
    #     raise MissingConfigError(
    #         'You have to set the path to the database JSON!', 'database_jsons')

    # Start with an empty dict to allow tracking by Sacred
    trainer = {
        "model": {
            "factory":
            pt.contrib.examples.source_separation.or_pit.OneAndRestPIT,
            "separator": {
                "factory": pt.contrib.examples.source_separation.tasnet.TasNet,
                'encoder': {
                    'factory':
                    pt.contrib.examples.source_separation.tasnet.tas_coders.
                    TasEncoder,
                    'window_length':
                    16,
                    'feature_size':
                    64,
                },
                'separator': {
                    'factory': pt.modules.dual_path_rnn.DPRNN,
                    'input_size': 64,
                    'rnn_size': 128,
                    'window_length': 100,
                    'hop_size': 50,
                    'num_blocks': 6,
                },
                'decoder': {
                    'factory':
                    pt.contrib.examples.source_separation.tasnet.tas_coders.
                    TasDecoder,
                    'window_length':
                    16,
                    'feature_size':
                    64,
                },
            }
        },
        "storage_dir": None,
        "optimizer": {
            "factory": pt.optimizer.Adam,
            "gradient_clipping": 1
        },
        "summary_trigger": (1000, "iteration"),
        "stop_trigger": (100_000, "iteration"),
        "loss_weights": {
            "si-sdr": 0.0,
            "log-mse": 1.0,
            "si-sdr-grad-stop": 0.0,
        }
    }
    pt.Trainer.get_config(trainer)
    if trainer['storage_dir'] is None:
        trainer['storage_dir'] = get_new_storage_dir(experiment_name)

    ex.observers.append(
        FileStorageObserver(Path(trainer['storage_dir']) / 'sacred'))
def dir_obs(tmpdir):
    return tmpdir, FileStorageObserver.create(tmpdir.strpath)