Example #1
0
def test_experiment_restoration(nested_dict_config, tmpdir):

    experiments_dir = tmpdir.join("experiments").strpath

    # create an experiment
    experiment = Experiment(nested_dict_config,
                            experiments_dir=experiments_dir)
    experiment.register_directory("temp")

    with pytest.raises(ValueError):
        # since the experiment with the same identifier has been
        # already created, experiment raises an error
        experiment = Experiment(nested_dict_config,
                                experiments_dir=experiments_dir)

    # test restoration from identifier
    experiment = Experiment(resume_from=experiment.config.identifier,
                            experiments_dir=experiments_dir)

    assert experiment.config.to_dict() == nested_dict_config
    # test that `temp` is registered after restoration
    assert os.path.isdir(experiment.temp)

    # test restoration from directory
    experiment = Experiment(resume_from=os.path.join(
        experiments_dir, experiment.config.identifier))

    assert experiment.config.to_dict() == nested_dict_config
    # test that `temp` is registered after restoration
    assert os.path.isdir(experiment.temp)
Example #2
0
def test_experiment_register_directory(nested_dict_config, tmpdir):

    experiments_dir = tmpdir.join("experiments").strpath

    experiment = Experiment(nested_dict_config,
                            experiments_dir=experiments_dir)

    experiment.register_directory("temp")
    target = os.path.join(experiment.experiment_dir, "temp")

    assert os.path.isdir(target)
    assert experiment.temp == target
Example #3
0
def do_exp(model_name, params, _dir, preproc_name, padding):
    """Perform an experiment using the specified parameters.

    Args:
        params (dict): specific hyperparameter set to use.
    Returns:
        (dict): score found using specified hyperparameters.
    """
    model = get_model(model_name)
    preproc = get_filter(preproc_name)
    X_train, y_train = get_data(dataset='train',
                                preprocessor=preproc,
                                TIMIT_root='../../TIMIT/TIMIT',
                                padding=padding)
    X_test, y_test = get_data(dataset='val',
                              preprocessor=preproc,
                              TIMIT_root='../../TIMIT/TIMIT',
                              padding=padding)
    try:
        with Experiment(config=params, experiments_dir=_dir) as experiment:
            score = run_model(model, X_train, y_train, X_test, y_test, params)
            experiment.register_result('score', score)
    except ValueError:
        # if something breaks, return the worst score possible
        return 0
    return score
Example #4
0
def test_experiment_commit_hash_saving(nested_dict_config, tmpdir):

    experiments_dir = tmpdir.join("experiments").strpath

    experiment = Experiment(nested_dict_config,
                            experiments_dir=experiments_dir)

    assert os.path.isfile(
        os.path.join(experiment.experiment_dir, "commit_hash"))
Example #5
0
def test_experiment_initialization(nested_dict_config, tmpdir):

    experiments_dir = tmpdir.join("experiments").strpath

    experiment = Experiment(nested_dict_config,
                            experiments_dir=experiments_dir)

    config = Config.from_json(
        os.path.join(experiments_dir, experiment.config.identifier,
                     "config.json"))

    assert config.to_dict() == nested_dict_config
Example #6
0
def test_experiment_logging(nested_dict_config, tmpdir):

    experiments_dir = tmpdir.join("experiments").strpath

    with Experiment(nested_dict_config,
                    experiments_dir=experiments_dir) as experiment:

        print("test")

    with open(experiment.log_file, "r") as f:
        assert f.readlines()[-1].strip() == "test"

    print("test2")
    # check that nothing is logged when print is called
    # outside with block
    with open(experiment.log_file, "r") as f:
        assert f.readlines()[-1].strip() == "test"
def single_experiment(model, data, params):
    """Apply the model to the data and store the results using mag.

    Args:
        model (str): name of callable model constructor in the current namespace.
        data (str): specify the TIMIT data sets to use. If specified, must be one of {'full', 'toy'}.
        params (dict): dictionary with parameters for model.
    """
    # prepare the experiment directory
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d-%H-%M-%S')
    _dir = "./results/" + model + "_" + st
    if data is not None and data.lower() == 'toy':
        _dir = "./results/TOY_" + model + "_" + st + '/'
    else:
        _dir = "./results/" + model + "_" + st + '/'

    # select the model to be used
    model = get_model(model)

    # get the specified dataset
    if data is None:
        data = "full"
    if data.lower() == "toy":
        X_train, y_train = get_data(dataset='toy',
                                    preprocessor=mel,
                                    TIMIT_root='../TIMIT/TIMIT',
                                    use_cache=True)
        X_test, y_test = X_train, y_train
    elif data == "full":
        X_train, y_train = get_data(dataset='train',
                                    preprocessor=mel,
                                    TIMIT_root='../TIMIT/TIMIT',
                                    use_cache=True)
        X_test, y_test = get_data(dataset='val',
                                  preprocessor=mel,
                                  TIMIT_root='../TIMIT/TIMIT',
                                  use_cache=True)
    else:
        raise ValueError("data must be one of {'toy', 'full'}")

    with Experiment(config=params, experiments_dir=_dir) as experiment:
        score = run_model(model, X_train, y_train, X_test, y_test, params)
        experiment.register_result('score', score)
Example #8
0
def test_experiment_register_result(simple_dict_config, tmpdir):

    experiments_dir = tmpdir.join("experiments").strpath

    experiment = Experiment(simple_dict_config,
                            experiments_dir=experiments_dir)

    experiment.register_result("fold1.accuracy", 0.97)
    experiment.register_result("fold2.accuracy", 0.99)
    experiment.register_result("fold1.loss", 0.03)
    experiment.register_result("fold2.loss", 0.01)
    experiment.register_result("overall_accuracy", 0.98)

    results = experiment.results.to_dict()

    assert results["fold1"]["accuracy"] == 0.97
    assert results["fold2"]["accuracy"] == 0.99
    assert results["fold1"]["loss"] == 0.03
    assert results["fold2"]["loss"] == 0.01
    assert results["overall_accuracy"] == 0.98
def do_exp(params, _dir, X_train, y_train, X_test, y_test, result_dict):
    """Perform an experiment using the specified parameters.
    
    Args:
        params (dict): specific hyperparameter set to use.
    Returns:
        (float): score found using specified hyperparameters.
    """
    try:
        with Experiment(config=params, experiments_dir=_dir) as experiment:
            score = run_model(model, X_train, y_train, X_test, y_test, params)
            # save the params and score
            for k in params.keys():
                result_dict[k].append(params[k])
            experiment.register_result('score', score)
    except ValueError:
        # if something breaks, return the worst score possible
        return np.inf
    return -score  # pylint: disable=invalid-unary-operand-type
Example #10
0
    "--num_workers",
    type=int,
    default=4,
    help="number of workers for data loader",
)
parser.add_argument(
    "--label",
    type=str,
    default="finetuned_hierarchical_cnn_classifier",
    help="optional label",
)
args = parser.parse_args()

class_map = load_json(args.classmap)

pretrained = Experiment(resume_from=args.pretrained_model)

with Experiment({
        "network": {
            "num_conv_blocks": pretrained.config.network.num_conv_blocks,
            "start_deep_supervision_on":
            pretrained.config.network.start_deep_supervision_on,
            "conv_base_depth": pretrained.config.network.conv_base_depth,
            "growth_rate": pretrained.config.network.growth_rate,
            "dropout": args.dropout,
            "output_dropout": args.output_dropout,
        },
        "data": {
            "_n_folds": args.n_folds,
            "_kfold_seed": args.kfold_seed,
            "n_fft": pretrained.config.data.n_fft,
Example #11
0
    "bert_model": args.bert_model.replace("-", "_"),
    "batch_accumulation": args.batch_accumulation,
    "batch_size": args.batch_size,
    "warmup": args.warmup,
    "lr": args.lr,
    "folds": args.folds,
    "max_sequence_length": args.max_sequence_length,
    "max_title_length": args.max_title_length,
    "max_question_length": args.max_question_length,
    "max_answer_length": args.max_answer_length,
    "head_tail": args.head_tail,
    "label": args.label,
    "_pseudo_file": args.pseudo_file,
    "model_type": args.model_type,
}
experiment = Experiment(config, implicit_resuming=args.use_folds is not None)
experiment.register_directory("checkpoints")
experiment.register_directory("predictions")


def seed_everything(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


logging.getLogger("transformers").setLevel(logging.ERROR)
seed_everything(args.seed)
Example #12
0
    "_bert_model": args.bert_model,
    "batch_accumulation": args.batch_accumulation,
    "batch_size": args.batch_size,
    "warmup": args.warmup,
    "lr": args.lr,
    "folds": args.folds,
    "max_sequence_length": args.max_sequence_length,
    "max_title_length": args.max_title_length,
    "max_question_length": args.max_question_length,
    "max_answer_length": args.max_answer_length,
    "head_tail": args.head_tail,
    "label": args.label,
    "split_pseudo": args.split_pseudo,
    "_pseudo_file": args.pseudo_file,
}
experiment = Experiment(config)
experiment.register_directory("checkpoints")
experiment.register_directory("predictions")


def seed_everything(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True


logging.getLogger("transformers").setLevel(logging.ERROR)
seed_everything(args.seed)
Example #13
0
with Experiment(
    {
        "network": {
            "num_conv_blocks": args.num_conv_blocks,
            "start_deep_supervision_on": args.start_deep_supervision_on,
            "conv_base_depth": args.conv_base_depth,
            "growth_rate": args.growth_rate,
            "output_dropout": args.output_dropout,
            "aggregation_type": args.aggregation_type
        },
        "data": {
            "features": args.features,
            "_n_folds": args.n_folds,
            "_kfold_seed": args.kfold_seed,
            "_input_dim": audio_transform.n_features,
            "p_mixup": args.p_mixup,
            "p_aug": args.p_aug,
            "max_audio_length": args.max_audio_length,
            "_train_df": args.train_df,
            "_train_data_dir": args.train_data_dir
        },
        "train": {
            "accumulation_steps": args.accumulation_steps,
            "batch_size": args.batch_size,
            "learning_rate": args.lr,
            "scheduler": args.scheduler,
            "optimizer": args.optimizer,
            "epochs": args.epochs,
            "_save_every": args.save_every,
            "weight_decay": args.weight_decay,
            "switch_off_augmentations_on": args.switch_off_augmentations_on
        },
        "label": args.label
    },
        implicit_resuming=args.resume) as experiment:
Example #14
0
    help="How frequenlty to plot samples from current distribution."
)
parser.add_argument(
    "--plot_points", type=int, default=1000,
    help="How many to points to generate for one plot."
)

args = parser.parse_args()

torch.manual_seed(42)


with Experiment({
    "batch_size": 40,
    "iterations": 10000,
    "initial_lr": 0.01,
    "lr_decay": 0.999,
    "flow_length": 16,
    "name": "planar"
}) as experiment:

    config = experiment.config
    experiment.register_directory("samples")
    experiment.register_directory("distributions")

    flow = NormalizingFlow(dim=2, flow_length=config.flow_length)
    bound = FreeEnergyBound(density=p_z)
    optimizer = optim.RMSprop(flow.parameters(), lr=config.initial_lr)
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, config.lr_decay)

    plot_density(p_z, directory=experiment.distributions)
                    help="whether to train on cuda or cpu",
                    choices=("cuda", "cpu"))
parser.add_argument(
    "--num_workers",
    type=int,
    default=4,
    help="number of workers for data loader",
)

args = parser.parse_args()

class_map = load_json(args.classmap)

train_df = pd.read_csv(args.train_df)

with Experiment(resume_from=args.experiment) as experiment:

    config = experiment.config

    audio_transform = AudioFeatures(config.data.features)

    splits = list(
        train_validation_data_stratified(train_df.fname, train_df.labels,
                                         class_map, config.data._n_folds,
                                         config.data._kfold_seed))

    all_labels = np.zeros(shape=(len(train_df), len(class_map)),
                          dtype=np.float32)
    all_predictions = np.zeros(shape=(len(train_df), len(class_map)),
                               dtype=np.float32)
Example #16
0
audio_transform = AudioFeatures(args.features)

with Experiment({
        "network": {
            "prediction_steps": args.prediction_steps,
            "rnn_size": args.rnn_size,
            "rnn_layers": args.rnn_layers
        },
        "data": {
            "features": args.features,
            "_n_folds": args.n_folds,
            "_kfold_seed": args.kfold_seed,
            "_input_dim": audio_transform.n_features,
            "p_aug": args.p_aug,
            "max_audio_length": args.max_audio_length
        },
        "train": {
            "_proj_interval": args.proj_interval,
            "accumulation_steps": args.accumulation_steps,
            "batch_size": args.batch_size,
            "learning_rate": args.lr,
            "scheduler": args.scheduler,
            "optimizer": args.optimizer,
            "epochs": args.epochs,
            "_save_every": args.save_every,
            "weight_decay": args.weight_decay,
            "switch_off_augmentations_on": args.switch_off_augmentations_on
        },
        "label": args.label
}) as experiment:

    config = experiment.config
Example #17
0
from torch.utils.data import DataLoader
from transformers import BertTokenizer, RobertaTokenizer

mag.use_custom_separator("-")

parser = argparse.ArgumentParser()

parser.add_argument("--experiment", type=str, required=True)
parser.add_argument("--checkpoint", type=str, required=True)
parser.add_argument("--bert_model", type=str, required=True)
parser.add_argument("--dataframe", type=str, required=True)
parser.add_argument("--output_dir", type=str, required=True)

args = parser.parse_args()

experiment = Experiment(resume_from=args.experiment)
config = experiment.config

logging.getLogger("transformers").setLevel(logging.ERROR)

test_df = pd.read_csv(args.dataframe)

original_args = argparse.Namespace(
    folds=config.folds,
    lr=config.lr,
    batch_size=config.batch_size,
    seed=config._seed,
    bert_model=args.bert_model,
    num_classes=30,
    target_columns=target_columns,
    input_columns=input_columns,
Example #18
0
args = parser.parse_args()

svm_config = {
    "model": {
        "C": args.C,
        "gamma": args.gamma
    },
    "crossval": {
        "n_folds": args.cv,
        "_random_seed": args.cv_random_seed
    }
}

iris = load_iris()

with Experiment(config=svm_config) as experiment:

    config = experiment.config

    model = SVC(C=config.model.C, gamma=config.model.gamma)

    score = cross_val_score(
        model, X=iris.data, y=iris.target, scoring="accuracy",
        cv=StratifiedKFold(
            config.crossval.n_folds,
            shuffle=True,
            random_state=config.crossval._random_seed),
    ).mean()

    print("Accuracy is", round(score, 4))
    experiment.register_result("accuracy", score)
Example #19
0
                        default=0.9,
                        help="Momentum value used in optimizer.")

    args = parser.parse_args()

    with Experiment({
            "_n_classes": 10,
            "network": {
                "n_layers": args.n_layers,
                "hidden_units": args.hidden_units,
                "activation": args.activation
            },
            "train":
        {
            "batch_size": args.batch_size,
            "n_epochs": args.n_epochs,
            # to exclude the parameter from the identifier,
            # start its name from the underscore
            "_buffer_size": 128,
            "learning_rate": args.lr,
            "momentum": args.momentum
        },
            "validation": {
                "_batch_size": 128
            }
    }) as experiment:

        classifier = MnistClassifier(mnist, experiment)
        classifier.fit()

        print("Finished!")  # will be logged to a file
Example #20
0
with Experiment({
        "data": {
            "_input_dim": 64,
            "_kfold_seed": 42,
            "_n_folds": 5,
            "_train_data_dir": "data/Training_Data/",
            "_train_df": "data/train_df.csv",
            "features": "mel_1024_512_64",
            "max_audio_length": 3,
            "p_aug": 0.3,
            "p_mixup": 0.0
        },
        "label": "2d_cnn",
        "network": {
            "aggregation_type": "max",
            "conv_base_depth": 32,
            "growth_rate": 1.3,
            "num_conv_blocks": 5,
            "output_dropout": 0.5,
            "start_deep_supervision_on": 2
        },
        "train": {
            "_save_every": 5,
            "accumulation_steps": 1,
            "batch_size": 50,
            "epochs": 7,
            "learning_rate": 0.001,
            "optimizer": "adam",
            "scheduler": "1cycle_0.0001_0.005",
            "switch_off_augmentations_on": 6,
            "weight_decay": 0.0
        }
}) as experiment:
with Experiment(
    {
        "network": {
            "backbone": args.backbone,
            "output_dropout": args.output_dropout,
        },
        "data": {
            "features": args.features,
            "_n_folds": args.n_folds,
            "_kfold_seed": args.kfold_seed,
            "_input_dim": audio_transform.n_features,
            "_n_classes": len(class_map),
            "_holdout_size": args.holdout_size,
            "p_mixup": args.p_mixup,
            "p_aug": args.p_aug,
            "max_audio_length": args.max_audio_length,
            "noisy": args.noisy_train_df is not None,
            "_train_df": args.train_df,
            "_train_data_dir": args.train_data_dir,
            "_noisy_train_df": args.noisy_train_df,
            "_noisy_train_data_dir": args.noisy_train_data_dir,
            "_share_noisy": args.share_noisy
        },
        "train": {
            "accumulation_steps": args.accumulation_steps,
            "batch_size": args.batch_size,
            "learning_rate": args.lr,
            "scheduler": args.scheduler,
            "optimizer": args.optimizer,
            "epochs": args.epochs,
            "_save_every": args.save_every,
            "weight_decay": args.weight_decay,
            "switch_off_augmentations_on": args.switch_off_augmentations_on
        },
        "label": args.label
    },
        implicit_resuming=args.resume) as experiment: