Beispiel #1
0
def simple_model(simple_sine_data):
    item, tensor_data = simple_sine_data

    num_features = 129 # number of frequencies in STFT
    embedding_size = 20 # how many sources to estimate
    activation = ['sigmoid', 'unit_norm'] # activation function for embedding
    num_audio_channels = 1 # number of audio channels

    modules = {
        'mix_magnitude': {},
        'log_spectrogram': {
            'class': 'AmplitudeToDB'
        },
        'normalization': {
            'class': 'BatchNorm',
        },
        'embedding': {
            'class': 'Embedding',
            'args': {
                'num_features': num_features,
                'hidden_size': num_features,
                'embedding_size': embedding_size,
                'activation': activation,
                'num_audio_channels': num_audio_channels,
                'dim_to_embed': [2, 3] # embed the frequency dimension (2) for all audio channels (3)
            }
        },
    }

    connections = [
        ['log_spectrogram', ['mix_magnitude', ]],
        ['normalization', ['log_spectrogram', ]],
        ['embedding', ['normalization', ]],
    ]

    output = ['embedding']

    config = {
        'modules': modules,
        'connections': connections,
        'output': output
    }

    model = ml.SeparationModel(config).to(DEVICE)
    untrained = ml.SeparationModel(config).to(DEVICE)
    loss_dictionary = {
        'EmbeddingLoss': {
            'class': 'WhitenedKMeansLoss'
        }
    }
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    closure = ml.train.closures.TrainClosure(loss_dictionary, optimizer, model)

    for i in range(10):
        loss_val = closure(None, tensor_data)
    return model, untrained, item, tensor_data
Beispiel #2
0
def test_ml_alias():
    modules = {
        'split': {
            'class': 'Split',
            'args': {
                'split_sizes': (3, 7),
                'dim': -1
            }
        },
        'split_zero': {
            'class': 'Alias',
        }
    }

    connections = [('split', ('data', )), ('split_zero', ('split:0', ))]

    outputs = ['split:0', 'split_zero']

    config = {
        'modules': modules,
        'connections': connections,
        'output': outputs
    }

    model = ml.SeparationModel(config)
    data = {'data': torch.randn(100, 10)}
    output = model(data)

    assert 'split_zero' in output
    assert torch.allclose(output['split:0'], output['split_zero'])
Beispiel #3
0
def overfit_model(scaper_folder):
    nussl.utils.seed(0)
    tfms = datasets.transforms.Compose([
        datasets.transforms.PhaseSensitiveSpectrumApproximation(),
        datasets.transforms.MagnitudeWeights(),
        datasets.transforms.ToSeparationModel(),
        datasets.transforms.GetExcerpt(100)
    ])
    dataset = datasets.Scaper(scaper_folder, transform=tfms)
    dataset.items = [dataset.items[5]]

    dataloader = torch.utils.data.DataLoader(dataset)

    n_features = dataset[0]['mix_magnitude'].shape[1]
    config = ml.networks.builders.build_recurrent_chimera(
        n_features,
        50,
        1,
        True,
        0.3,
        20,
        'sigmoid',
        2,
        'sigmoid',
        normalization_class='InstanceNorm')
    model = ml.SeparationModel(config)
    model = model.to(DEVICE)

    optimizer = optim.Adam(model.parameters(), lr=1e-2)

    loss_dictionary = {
        'DeepClusteringLoss': {
            'weight': 0.2
        },
        'PermutationInvariantLoss': {
            'args': ['L1Loss'],
            'weight': 0.8
        }
    }

    train_closure = ml.train.closures.TrainClosure(loss_dictionary, optimizer,
                                                   model)

    trainer, _ = ml.train.create_train_and_validation_engines(train_closure,
                                                              device=DEVICE)

    with tempfile.TemporaryDirectory() as tmpdir:
        _dir = fix_dir if fix_dir else tmpdir

        ml.train.add_stdout_handler(trainer)
        ml.train.add_validate_and_checkpoint(_dir, model, optimizer, dataset,
                                             trainer)

        trainer.run(dataloader, max_epochs=1, epoch_length=EPOCH_LENGTH)

        model_path = os.path.join(trainer.state.output_folder, 'checkpoints',
                                  'best.model.pth')
        yield model_path, dataset.process_item(dataset.items[0])
Beispiel #4
0
def overfit_audio_model(scaper_folder):
    nussl.utils.seed(0)
    tfms = datasets.transforms.Compose([
        datasets.transforms.GetAudio(),
        datasets.transforms.ToSeparationModel(),
        datasets.transforms.GetExcerpt(
            32000, time_dim=1, tf_keys=['mix_audio', 'source_audio'])
    ])
    dataset = datasets.Scaper(
        scaper_folder, transform=tfms)
    dataset.items = [dataset.items[5]]

    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=1)

    config = ml.networks.builders.build_recurrent_end_to_end(
        256, 256, 64, 'sqrt_hann', 50, 2, 
        True, 0.3, 2, 'sigmoid', num_audio_channels=1, 
        mask_complex=False, rnn_type='lstm', 
        mix_key='mix_audio')

    model = ml.SeparationModel(config)
    model = model.to(DEVICE)

    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    loss_dictionary = {
        'PermutationInvariantLoss': {
            'args': ['SISDRLoss'],
            'weight': 1.0,
            'keys': {'audio': 'estimates', 'source_audio': 'targets'}
        }
    }

    train_closure = ml.train.closures.TrainClosure(
        loss_dictionary, optimizer, model)
    
    trainer, _ = ml.train.create_train_and_validation_engines(
        train_closure, device=DEVICE
    )

    with tempfile.TemporaryDirectory() as tmpdir:
        _dir = os.path.join(fix_dir, 'dae') if fix_dir else tmpdir

        ml.train.add_stdout_handler(trainer)
        ml.train.add_validate_and_checkpoint(
            _dir, model, optimizer, dataset, trainer)
        ml.train.add_progress_bar_handler(trainer)

        trainer.run(dataloader, max_epochs=1, epoch_length=EPOCH_LENGTH)

        model_path = os.path.join(
            trainer.state.output_folder, 'checkpoints', 'best.model.pth')
        yield model_path, dataset.process_item(dataset.items[0])
Beispiel #5
0
val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             num_workers=NUM_WORKERS,
                                             batch_size=BATCH_SIZE,
                                             sampler=val_sampler)

n_features = dataset[0]['mix_magnitude'].shape[1]
# builds a baseline model with 4 recurrent layers, 600 hidden units, bidirectional
config = ml.networks.builders.build_recurrent_mask_inference(
    n_features,
    600,
    4,
    True,
    0.3,
    2, ['sigmoid'],
    normalization_class='BatchNorm')
model = ml.SeparationModel(config).to(DEVICE)
logging.info(model)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 factor=0.5,
                                                 patience=PATIENCE)

# set up the loss function
loss_dictionary = {
    'PermutationInvariantLoss': {
        'args': ['L1Loss'],
        'weight': 1.0
    }
}
Beispiel #6
0
def test_overfit_a(mix_source_folder):
    tfms = datasets.transforms.Compose([
        datasets.transforms.PhaseSensitiveSpectrumApproximation(),
        datasets.transforms.ToSeparationModel(),
        datasets.transforms.Cache('~/.nussl/tests/cache', overwrite=True),
        datasets.transforms.GetExcerpt(400)
    ])
    dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms)

    ml.train.cache_dataset(dataset)
    dataset.cache_populated = True

    dataloader = torch.utils.data.DataLoader(dataset,
                                             shuffle=True,
                                             batch_size=len(dataset),
                                             num_workers=2)

    # create the model, based on the first item in the dataset
    # second bit of the shape is the number of features
    n_features = dataset[0]['mix_magnitude'].shape[1]
    mi_config = ml.networks.builders.build_recurrent_mask_inference(
        n_features,
        50,
        1,
        False,
        0.0,
        2,
        'sigmoid',
    )

    model = ml.SeparationModel(mi_config)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    if device == 'cuda':
        epoch_length = 100
    else:
        epoch_length = 10
    model = model.to(device)
    # create optimizer
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    loss_dictionary = {'L1Loss': {'weight': 1.0}}

    train_closure = ml.train.closures.TrainClosure(loss_dictionary, optimizer,
                                                   model)
    val_closure = ml.train.closures.ValidationClosure(loss_dictionary, model)

    with tempfile.TemporaryDirectory() as tmpdir:
        _dir = fix_dir if fix_dir else tmpdir
        os.makedirs(os.path.join(_dir, 'plots'), exist_ok=True)

        trainer, validator = ml.train.create_train_and_validation_engines(
            train_closure, val_closure, device=device)

        # add handlers to engine
        ml.train.add_stdout_handler(trainer, validator)
        ml.train.add_validate_and_checkpoint(_dir,
                                             model,
                                             optimizer,
                                             dataset,
                                             trainer,
                                             val_data=dataloader,
                                             validator=validator)
        ml.train.add_tensorboard_handler(_dir, trainer)

        # run engine
        trainer.run(dataloader, max_epochs=5, epoch_length=epoch_length)

        model_path = os.path.join(trainer.state.output_folder, 'checkpoints',
                                  'best.model.pth')
        state_dict = torch.load(model_path,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(state_dict['state_dict'])

        history = state_dict['metadata']['trainer.state.epoch_history']

        for key in history:
            plt.figure(figsize=(10, 4))
            plt.title(f"epoch:{key}")
            plt.plot(np.array(history[key]).reshape(-1, ))
            plt.savefig(
                os.path.join(trainer.state.output_folder, 'plots',
                             f"epoch:{key.replace('/', ':')}.png"))
Beispiel #7
0
def test_create_engine(mix_source_folder):
    # load dataset with transforms
    tfms = datasets.transforms.Compose([
        datasets.transforms.PhaseSensitiveSpectrumApproximation(),
        datasets.transforms.ToSeparationModel(),
        datasets.transforms.Cache(os.path.join(fix_dir, 'cache'))
    ])
    dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms)

    # create the model, based on the first item in the dataset
    # second bit of the shape is the number of features
    n_features = dataset[0]['mix_magnitude'].shape[1]
    mi_config = ml.networks.builders.build_recurrent_mask_inference(
        n_features,
        50,
        2,
        True,
        0.3,
        2,
        'softmax',
    )

    model = ml.SeparationModel(mi_config)
    # create optimizer
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # dummy function for processing a batch through the model
    def train_batch(engine, data):
        loss = -engine.state.iteration
        return {'loss': loss}

    # building the training and validation engines and running them
    # the validation engine runs within the training engine run
    with tempfile.TemporaryDirectory() as tmpdir:
        _dir = fix_dir if fix_dir else tmpdir
        # _dir = tmpdir
        trainer, validator = ml.train.create_train_and_validation_engines(
            train_batch, train_batch)

        # add handlers to engine
        ml.train.add_stdout_handler(trainer, validator)
        ml.train.add_validate_and_checkpoint(_dir,
                                             model,
                                             optimizer,
                                             dataset,
                                             trainer,
                                             dataset,
                                             validator,
                                             save_by_epoch=1)
        ml.train.add_tensorboard_handler(_dir, trainer, every_iteration=True)
        ml.train.add_progress_bar_handler(trainer)

        # run engine
        trainer.run(dataset, max_epochs=3)

        assert os.path.exists(trainer.state.output_folder)
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'latest.model.pth'))
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'best.model.pth'))
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'latest.optimizer.pth'))
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'best.optimizer.pth'))

        for i in range(1, 4):
            assert os.path.exists(
                os.path.join(trainer.state.output_folder, 'checkpoints',
                             f'epoch{i}.model.pth'))

        assert len(trainer.state.epoch_history['train/loss']) == 3
        assert len(trainer.state.iter_history['loss']) == 10

        # try resuming
        model_path = os.path.join(trainer.state.output_folder, 'checkpoints',
                                  'latest.model.pth')
        optimizer_path = os.path.join(trainer.state.output_folder,
                                      'checkpoints', 'latest.optimizer.pth')

        opt_state_dict = torch.load(optimizer_path,
                                    map_location=lambda storage, loc: storage)
        state_dict = torch.load(model_path,
                                map_location=lambda storage, loc: storage)

        optimizer.load_state_dict(opt_state_dict)
        model.load_state_dict(state_dict['state_dict'])

        # make sure the cache got removed in saved transforms bc it's not a portable
        # transform
        for t in state_dict['metadata']['train_dataset'][
                'transforms'].transforms:
            assert not isinstance(t, datasets.transforms.Cache)

        new_trainer, new_validator = (
            ml.train.create_train_and_validation_engines(train_batch))

        # add handlers to engine
        ml.train.add_stdout_handler(new_trainer)
        ml.train.add_validate_and_checkpoint(trainer.state.output_folder,
                                             model, optimizer, dataset,
                                             new_trainer)
        ml.train.add_tensorboard_handler(trainer.state.output_folder,
                                         new_trainer)

        new_trainer.load_state_dict(
            state_dict['metadata']['trainer.state_dict'])
        assert new_trainer.state.epoch == trainer.state.epoch
        new_trainer.run(dataset, max_epochs=3)
Beispiel #8
0
def test_trainer_data_parallel(mix_source_folder):
    # load dataset with transforms
    tfms = datasets.transforms.Compose([
        datasets.transforms.PhaseSensitiveSpectrumApproximation(),
        datasets.transforms.ToSeparationModel()
    ])
    dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms)

    # create the model, based on the first item in the dataset
    # second bit of the shape is the number of features
    n_features = dataset[0]['mix_magnitude'].shape[1]
    mi_config = ml.networks.builders.build_recurrent_mask_inference(
        n_features,
        50,
        2,
        True,
        0.3,
        2,
        'softmax',
    )

    model = ml.SeparationModel(mi_config)
    model = torch.nn.DataParallel(model)
    # create optimizer
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # dummy function for processing a batch through the model
    def train_batch(engine, data):
        loss = np.random.rand()
        return {'loss': loss}

    # building the training and validation engines and running them
    # the validation engine runs within the training engine run
    with tempfile.TemporaryDirectory() as tmpdir:
        _dir = fix_dir if fix_dir else tmpdir
        trainer, validator = ml.train.create_train_and_validation_engines(
            train_batch, train_batch)

        # add handlers to engine
        ml.train.add_stdout_handler(trainer, validator)
        ml.train.add_validate_and_checkpoint(_dir, model, optimizer, dataset,
                                             trainer, dataset, validator)
        ml.train.add_tensorboard_handler(_dir, trainer)

        # run engine
        trainer.run(dataset, max_epochs=3)

        assert os.path.exists(trainer.state.output_folder)
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'latest.model.pth'))
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'best.model.pth'))
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'latest.optimizer.pth'))
        assert os.path.exists(
            os.path.join(trainer.state.output_folder, 'checkpoints',
                         'best.optimizer.pth'))

        assert len(trainer.state.epoch_history['train/loss']) == 3
        assert len(trainer.state.iter_history['loss']) == 10
Beispiel #9
0
def test_gradients(mix_source_folder):
    os.makedirs('tests/local/', exist_ok=True)

    utils.seed(0)

    tfms = datasets.transforms.Compose([
        datasets.transforms.GetAudio(),
        datasets.transforms.PhaseSensitiveSpectrumApproximation(),
        datasets.transforms.MagnitudeWeights(),
        datasets.transforms.ToSeparationModel(),
        datasets.transforms.GetExcerpt(50),
        datasets.transforms.GetExcerpt(3136,
                                       time_dim=1,
                                       tf_keys=['mix_audio', 'source_audio'])
    ])
    dataset = datasets.MixSourceFolder(mix_source_folder, transform=tfms)

    # create the model, based on the first item in the dataset
    # second bit of the shape is the number of features
    n_features = dataset[0]['mix_magnitude'].shape[1]

    # make some configs
    names = [
        'dpcl', 'mask_inference_l1', 'mask_inference_mse_loss', 'chimera',
        'open_unmix', 'end_to_end', 'dual_path'
    ]
    config_has_batch_norm = ['open_unmix', 'dual_path']
    configs = [
        ml.networks.builders.build_recurrent_dpcl(
            n_features,
            50,
            1,
            True,
            0.0,
            20, ['sigmoid'],
            normalization_class='InstanceNorm'),
        ml.networks.builders.build_recurrent_mask_inference(
            n_features,
            50,
            1,
            True,
            0.0,
            2, ['softmax'],
            normalization_class='InstanceNorm'),
        ml.networks.builders.build_recurrent_mask_inference(
            n_features,
            50,
            1,
            True,
            0.0,
            2, ['softmax'],
            normalization_class='InstanceNorm'),
        ml.networks.builders.build_recurrent_chimera(
            n_features,
            50,
            1,
            True,
            0.0,
            20, ['sigmoid'],
            2, ['softmax'],
            normalization_class='InstanceNorm'),
        ml.networks.builders.build_open_unmix_like(
            n_features,
            50,
            1,
            True,
            .4,
            2,
            1,
            add_embedding=True,
            embedding_size=20,
            embedding_activation=['sigmoid', 'unit_norm'],
        ),
        ml.networks.builders.build_recurrent_end_to_end(
            256,
            256,
            64,
            'sqrt_hann',
            50,
            2,
            True,
            0.0,
            2,
            'sigmoid',
            num_audio_channels=1,
            mask_complex=False,
            rnn_type='lstm',
            mix_key='mix_audio',
            normalization_class='InstanceNorm'),
        ml.networks.builders.build_dual_path_recurrent_end_to_end(
            64,
            16,
            8,
            60,
            30,
            50,
            2,
            True,
            25,
            2,
            'sigmoid',
        )
    ]

    loss_dictionaries = [
        {
            'DeepClusteringLoss': {
                'weight': 1.0
            }
        },
        {
            'L1Loss': {
                'weight': 1.0
            }
        },
        {
            'MSELoss': {
                'weight': 1.0
            }
        },
        {
            'DeepClusteringLoss': {
                'weight': 0.2
            },
            'PermutationInvariantLoss': {
                'args': ['L1Loss'],
                'weight': 0.8
            }
        },
        {
            'DeepClusteringLoss': {
                'weight': 0.2
            },
            'PermutationInvariantLoss': {
                'args': ['L1Loss'],
                'weight': 0.8
            }
        },
        {
            'SISDRLoss': {
                'weight': 1.0,
                'keys': {
                    'audio': 'estimates',
                    'source_audio': 'references'
                }
            }
        },
        {
            'SISDRLoss': {
                'weight': 1.0,
                'keys': {
                    'audio': 'estimates',
                    'source_audio': 'references'
                }
            }
        },
    ]

    def append_keys_to_model(name, model):
        if name == 'end_to_end':
            model.output_keys.extend(
                ['audio', 'recurrent_stack', 'mask', 'estimates'])
        elif name == 'dual_path':
            model.output_keys.extend(
                ['audio', 'mixture_weights', 'dual_path', 'mask', 'estimates'])

    for name, config, loss_dictionary in zip(names, configs,
                                             loss_dictionaries):
        loss_closure = ml.train.closures.Closure(loss_dictionary)

        utils.seed(0, set_cudnn=True)
        model_grad = ml.SeparationModel(config, verbose=True).to(DEVICE)
        append_keys_to_model(name, model_grad)

        all_data = {}
        for data in dataset:
            for key in data:
                if torch.is_tensor(data[key]):
                    data[key] = data[key].float().unsqueeze(0).contiguous().to(
                        DEVICE)
                    if key not in all_data:
                        all_data[key] = data[key]
                    else:
                        all_data[key] = torch.cat([all_data[key], data[key]],
                                                  dim=0)

        # do a forward pass in batched mode
        output_grad = model_grad(all_data)
        _loss = loss_closure.compute_loss(output_grad, all_data)
        # do a backward pass in batched mode
        _loss['loss'].backward()

        plt.figure(figsize=(10, 10))
        utils.visualize_gradient_flow(model_grad.named_parameters())
        plt.tight_layout()
        plt.savefig(f'tests/local/{name}:batch_gradient.png')

        utils.seed(0, set_cudnn=True)
        model_acc = ml.SeparationModel(config).to(DEVICE)
        append_keys_to_model(name, model_acc)

        for i, data in enumerate(dataset):
            for key in data:
                if torch.is_tensor(data[key]):
                    data[key] = data[key].float().unsqueeze(0).contiguous().to(
                        DEVICE)
            # do a forward pass on each item individually
            output_acc = model_acc(data)
            for key in output_acc:
                # make sure the forward pass in batch and forward pass individually match
                # if they don't, then items in a minibatch are talking to each other
                # somehow...
                _data_a = output_acc[key]
                _data_b = output_grad[key][i].unsqueeze(0)
                if name not in config_has_batch_norm:
                    assert torch.allclose(_data_a, _data_b, atol=1e-3)

            _loss = loss_closure.compute_loss(output_acc, data)
            # do a backward pass on each item individually
            _loss['loss'] = _loss['loss'] / len(dataset)
            _loss['loss'].backward()

        plt.figure(figsize=(10, 10))
        utils.visualize_gradient_flow(model_acc.named_parameters())
        plt.tight_layout()
        plt.savefig(f'tests/local/{name}:accumulated_gradient.png')

        # make sure the gradients match between batched and accumulated gradients
        # if they don't, then the items in a batch are talking to each other in the loss
        for param1, param2 in zip(model_grad.parameters(),
                                  model_acc.parameters()):
            assert torch.allclose(param1, param2)
            if name not in config_has_batch_norm:
                if param1.requires_grad and param2.requires_grad:
                    assert torch.allclose(param1.grad.mean(),
                                          param2.grad.mean(),
                                          atol=1e-3)