Ejemplo n.º 1
0
def test_without_comet(path_to_yml):
    api_key = os.environ.pop('COMET_API_KEY', None)
    config, exp, path_to_yml = load_experiment(path_to_yml)
    save_experiment(config, exp)
    if api_key:
        os.environ['COMET_API_KEY'] = api_key

    # Test that it doesn't make a new experiment the second time around.
    load_experiment(path_to_yml)
Ejemplo n.º 2
0
def sweep_experiment(path_to_yml_file):
    base_experiment = load_yaml(path_to_yml_file)
    sweep = base_experiment.pop('sweep', [])
    experiments = []
    cache_experiments = []

    for k, _sweep in enumerate(sweep):
        lists = []
        keys = []
        for key in _sweep:
            if isinstance(_sweep[key], list):
                keys.append(key)
                lists.append(_sweep[key])

        _combos = list(itertools.product(*lists))
        combos = []
        for c in _combos:
            combos.append({keys[i]: c[i] for i in range(len(c))})

        if _sweep['populate_cache']:
            cache_config, cache_exp, cache_path_to_yml_file = load_experiment(
                path_to_yml_file)
            cache_config.pop('sweep')
            logging_str = (
                f"Creating cache population experiment {0}/{len(combos)} "
                f"for sweep {0}/{len(sweep)}")
            this_experiment = update_config_with_sweep(cache_config, _sweep,
                                                       combos[0], logging_str)
            this_experiment['train_config']['num_epochs'] = 0
            this_experiment['dataset_config']['overwrite_cache'] = True

            if 'num_cache_workers' in _sweep:
                this_experiment['train_config']['num_workers'] = (
                    _sweep['num_cache_workers'])
            cache_experiments.append(
                save_experiment(this_experiment, cache_exp))

        for j, c in enumerate(combos):
            config, exp, _path_to_yml_file = load_experiment(path_to_yml_file)
            config.pop('sweep')

            logging_str = (f"\n\tCreating experiment {j+1}/{len(combos)} "
                           f"for sweep {k+1}/{len(sweep)}")
            this_experiment = update_config_with_sweep(config, _sweep, c,
                                                       logging_str)
            experiments.append(save_experiment(this_experiment, exp))

    return experiments, cache_experiments
Ejemplo n.º 3
0
def main(path_to_yml_file):
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)

    paths = glob.glob(os.path.join(config['info']['output_folder'], 'results',
                                   '**.yml'),
                      recursive=True)

    results = []

    for _path in paths:
        data = load_yaml(_path, [])
        for _data in data:
            keys = sorted(list(_data.keys()))
            keys.remove('permutation')
            for key in keys:
                flattened = {
                    'experiment_key': config['info']['experiment_key'],
                    'notes': config['info']['notes'],
                    'file_name': _path,
                    'dataset': config['datasets']['test']['folder'],
                    'source_name': key.split('/')[-1],
                }

                flattened.update(flatten(config))

                for metric in _data[key]:
                    flattened[metric] = np.mean(_data[key][metric])

                results.append(flattened)

    results = pd.DataFrame(results)
    return results, config, exp
Ejemplo n.º 4
0
def main(path_to_yml_file):
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)

    train_class = config['train_config'].pop('class')
    TrainerClass = getattr(train, train_class)

    if 'train' not in config['datasets']:
        logging.error('Train dataset must be specified!')

    _datasets = {}

    for key in ['train', 'val']:
        if key in config['datasets']:
            _datasets[key] = loaders.load_dataset(
                config['datasets'][key]['class'],
                config['datasets'][key]['folder'], config['dataset_config'])
        else:
            _datasets[key] = None

    _model = loaders.load_model(config['model_config'])
    _trainer = TrainerClass(config['info']['output_folder'],
                            _datasets['train'],
                            _model,
                            config['train_config'],
                            validation_data=_datasets['val'],
                            use_tensorboard=config['train_config'].pop(
                                'use_tensorboard', False),
                            experiment=exp)
    _trainer.fit()
Ejemplo n.º 5
0
def main(path_to_yml_file):
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)

    if 'test' not in config['datasets']:
        logging.error('Test dataset must be specified!')

    test_classes = config['test_config']['testers']
    testers = []
    for key in test_classes:
        TestClass = getattr(test, key)
        args = test_classes[key]
        testers.append((TestClass, args))

    _datasets = {}

    for key in ['test']:
        if key in config['datasets']:
            _datasets[key] = loaders.load_dataset(
                config['datasets'][key]['class'],
                config['datasets'][key]['folder'], config['dataset_config'])
        else:
            _datasets[key] = None

    _tester = test.EvaluationRunner(
        testers,
        config['algorithm_config'],
        _datasets['test'],
        config['info']['output_folder'],
        max_workers=config['test_config']['num_workers'],
        use_blocking_executor=config['test_config']['use_blocking_executor'])
    _tester.run()
Ejemplo n.º 6
0
def evaluate(path_to_yml_file, eval_keys=['test']):
    """
    Evaluates an experiment across all of the data for each key in eval_keys. The key
    must correspond to a dataset included in the experiment configuration. This uses
    :py:class:`src.test.EvaluationRunner` to evaluate the performance of the model on
    each dataset.
    
    Args:
        path_to_yml_file (str): Path to the yml file that defines the experiment. The
            corresponding test configuration for the experiment will be used to evaluate
            the experiment across all of the audio files in the test dataset.
        eval_keys (list): All of the keys to be used to evaluate the experiment. Will
            run the evaluation on each eval_key in sequence. Defaults to ['test'].
    """
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)

    if 'seed' in config['info']:
        seed(config['info']['seed'])

    if 'test' not in config['datasets']:
        logging.error('Test dataset must be specified!')

    test_classes = config['test_config']['testers']
    testers = []
    for key in test_classes:
        TestClass = getattr(test, key)
        args = test_classes[key]
        testers.append((TestClass, args))

    _datasets = {}

    for key in eval_keys:
        if key in config['datasets']:
            _datasets[key] = loaders.load_dataset(
                config['datasets'][key]['class'],
                config['datasets'][key]['folder'], config['dataset_config'])
        else:
            _datasets[key] = None

    for key in eval_keys:
        _tester = test.EvaluationRunner(
            testers,
            config['algorithm_config'],
            _datasets[key],
            config['info']['output_folder'],
            max_workers=config['test_config']['num_workers'],
            use_blocking_executor=config['test_config']
            ['use_blocking_executor'],
        )
        _tester.run()
Ejemplo n.º 7
0
def train_experiment(path_to_yml_file, **kwargs):
    """
    Starts a training job for the experiment defined at the path specified. Fits the
    model accordingly. You can also pass in things into keyword arguments that will
    get tossed into the "options" dictionary that is passed to the Trainer class.
    
    Args:
        path_to_yml_file (str): Path to the configuration for the experiment that 
        is getting trained. The script will take the configuration and launch a 
        training job for the experiment.
    """
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)
    config['train_config'].update(kwargs)

    if 'seed' in config['info']:
        seed(config['info']['seed'])

    train_class = config['train_config'].pop('class')
    TrainerClass = getattr(train, train_class)

    if 'train' not in config['datasets']:
        logging.error('Train dataset must be specified!')

    _datasets = {}

    for key in ['train', 'val']:
        if key in config['datasets']:
            _datasets[key] = loaders.load_dataset(
                config['datasets'][key]['class'],
                config['datasets'][key]['folder'], config['dataset_config'])
        else:
            _datasets[key] = None

    _model = loaders.load_model(config['model_config'])
    _trainer = TrainerClass(config['info']['output_folder'],
                            _datasets['train'],
                            _model,
                            config['train_config'],
                            validation_data=_datasets['val'],
                            use_tensorboard=config['train_config'].pop(
                                'use_tensorboard', False),
                            experiment=exp)
    _trainer.fit()
Ejemplo n.º 8
0
def test_with_comet(path_to_yml):
    os.environ['ARTIFACTS_DIRECTORY'] = 'tests/out/_test_experiment_utils/'
    config, exp, path_to_yml_file = load_experiment(path_to_yml)
    save_experiment(config, exp)
Ejemplo n.º 9
0
def analyze(path_to_yml_file, use_gsheet=False, upload_source_metrics=False):
    """
    Analyzes the metrics for all the files that were evaluated in the experiment.
    
    Args:
        path_to_yml_file (str): Path to the yml file that defines the experiment. The
            corresponding results folder for the experiment is what will be analyzed and put
            into a Pandas dataframe.
        use_gsheet (bool, optional): Whether or not to upload to the Google Sheet. 
            Defaults to False.
        upload_source_metrics (bool): Uploads metrics for each source if True. Defaults to False.
            Can have interactions with the API limit on Google Sheets. If there are two many 
            sources, then it will hit the limit and the script will break.
    
    Returns:
        tuple: 3-element tuple containing

            - results (:class:`pandas.DataFrame`): DataFrame containing all of the results 
              for every file evaluated in the experiment. The DataFrame also has every
              key in the experiment configuration in flattened format.
              
              For example, model_config_recurrent_stack_args_embedding_size is a column in the DataFrame.

            - config (*dict*):  A dictionary containing the configuration of the experiment. 

            - exp (:class:`comet_ml.Experiment`): An instantiated experiment if comet.ml is needed,  otherwise it is None.
    """
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)

    paths = glob.glob(os.path.join(config['info']['output_folder'], 'results',
                                   '**.yml'),
                      recursive=True)

    results = []

    for _path in paths:
        data = load_yaml(_path, [])
        for _data in data:
            keys = sorted(list(_data.keys()))
            keys.remove('permutation')
            for key in keys:
                flattened = {
                    'experiment_key': config['info']['experiment_key'],
                    'notes': config['info']['notes'],
                    'file_name': _path,
                    'dataset': config['datasets']['test']['folder'],
                    'source_name': key.split('/')[-1],
                }

                flattened.update(flatten(config))

                for metric in _data[key]:
                    flattened[metric] = np.mean(_data[key][metric])

                results.append(flattened)

    results = pd.DataFrame(results)

    logging.info(results.mean())
    logging.info(config['info']['experiment_key'])

    if use_gsheet:
        upload_to_gsheet(results, config, exp, upload_source_metrics)

    return results, config, exp
Ejemplo n.º 10
0
def visualize(path_to_yml_file, file_names=[], eval_keys=['test']):
    """
    Takes in a path to a yml file containing an experiment configuration and runs
    the algorithm specified in the experiment on a random file from the test
    dataset specified in the experiment. If the algorithm has plotting available, then
    plot is used to visualize the algorithm and save it to a figure. The associated
    audio is also saved.
    
    Args:
        path_to_yml_file (str): Path to the yml file that defines the experiment. The 
            visualization will be placed into a "viz" folder in the same directory
            as the yml file.
        eval_keys (list): All of the dataset keys to be used to visualize the experiment. 
            Will visualize for each eval_key in sequence. Defaults to ['test'].
    """
    config, exp, path_to_yml_file = load_experiment(path_to_yml_file)
    algorithm_config = config['algorithm_config']
    AlgorithmClass = getattr(algorithms, algorithm_config['class'])
    args = inspect.getfullargspec(AlgorithmClass)[0]
    if 'extra_modules' in args:
        algorithm_config['args']['extra_modules'] = model.extras
    if 'use_cuda' in args:
        algorithm_config['args']['use_cuda'] = torch.cuda.is_available()
    _datasets = {}

    for key in eval_keys:
        if key in config['datasets']:
            _datasets[key] = loaders.load_dataset(
                config['datasets'][key]['class'],
                config['datasets'][key]['folder'], config['dataset_config'])

    for key in _datasets:
        i = np.random.randint(len(_datasets[key]))
        file_names = file_names if file_names else [_datasets[key].files[i]]

        for file_name in file_names:
            try:
                logging.info(f'Visualizing {file_name}')
                folder = os.path.splitext(os.path.basename(file_name))[0]
                output_folder = os.path.join(config['info']['output_folder'],
                                             'viz', key, folder)
                os.makedirs(output_folder, exist_ok=True)

                mixture = _datasets[key].load_audio_files(file_name)[0]

                logging.info(mixture)

                _algorithm = AlgorithmClass(mixture,
                                            **algorithm_config['args'])
                _algorithm.run()
                estimates = _algorithm.make_audio_signals()

                try:
                    plt.figure(figsize=(20, 10))
                    _algorithm.plot()
                    plt.savefig(os.path.join(output_folder, 'viz.png'),
                                bbox_inches='tight',
                                dpi=100)
                except:
                    logging.error('Unable to plot.')

                mixture.write_audio_to_file(
                    os.path.join(output_folder, f'mixture.wav'))
                for i, e in enumerate(estimates):
                    e.write_audio_to_file(
                        os.path.join(output_folder, f'source{i}.wav'))
            except:
                logging.error('File name not found.')
Ejemplo n.º 11
0
def create_experiments(path_to_yml_file):
    """
    The main logic of this script. Takes the path to the base experiment file and
    loads the configuration. It then goes through the sweep dictionary kept in that
    base experiment file. The sweep dictionary tells how to update the configuration.
    The Cartesian product of all the possible settings specified by sweep is taken.
    Each experiment is updated accordingly. The length of the Cartesian product of
    the sweep is the number of experiments that get created. 
    
    Args:
        path_to_yml_file (str): Path to base experiment file.
    
    Returns:
        tuple: 2-element tuple containing

            - experiments (*list*):  List of paths to .yml files that define the generated
                experiments.
            - cache_experiments (*list*):  List of paths to .yml files that define the 
                experiments used for creating caches if any.
    """
    base_experiment = load_yaml(path_to_yml_file)
    sweep = base_experiment.pop('sweep', [])
    experiments = []
    cache_experiments = []

    for k, _sweep in enumerate(sweep):
        lists = []
        keys = []
        for key in _sweep:
            if isinstance(_sweep[key], list):
                keys.append(key)
                lists.append(_sweep[key])

        _combos = list(itertools.product(*lists))
        combos = []
        for c in _combos:
            combos.append({keys[i]: c[i] for i in range(len(c))})

        if _sweep['populate_cache']:
            # Create a single experiment for creating dataset caches.
            cache_config, cache_exp, cache_path_to_yml_file = load_experiment(
                path_to_yml_file)
            cache_config.pop('sweep')
            this_experiment = update_config_with_sweep(cache_config, _sweep,
                                                       combos[0])
            this_experiment['train_config']['num_epochs'] = 0
            this_experiment['dataset_config']['overwrite_cache'] = True

            if 'num_cache_workers' in _sweep:
                this_experiment['train_config']['num_workers'] = (
                    _sweep['num_cache_workers'])
            cache_experiments.append(
                save_experiment(this_experiment, cache_exp))

        for j, c in enumerate(combos):
            # Sweep across all the possible combinations and update.
            config, exp, _path_to_yml_file = load_experiment(path_to_yml_file)
            config.pop('sweep')

            this_experiment = update_config_with_sweep(config, _sweep, c)
            experiments.append(save_experiment(this_experiment, exp))

    return experiments, cache_experiments