Beispiel #1
0
def test_filter_without_keys():
    examples = get_examples()
    ds = lazy_dataset.from_list(list(examples.values()))

    ds_slice = ds[:2]
    example_ids = [e['example_id'] for e in ds_slice]
    assert example_ids == ['example_id_1', 'example_id_2']

    ds_filter = ds.filter(lambda e: int(e['example_id'].split('_')[-1]) < 2)

    example_ids = [e['example_id'] for e in ds_filter]
    assert example_ids == ['example_id_1']
Beispiel #2
0
def test_filter_with_reshuffle():
    examples = get_examples()
    ds = lazy_dataset.from_list(list(examples.values()))

    ds_slice = ds[:2]
    example_ids = [e['example_id'] for e in ds_slice]
    assert example_ids == ['example_id_1', 'example_id_2']

    def filter_fn(e):
        return int(e['example_id'].split('_')[-1]) < 2

    ds_filter = ds.shuffle().filter(filter_fn)

    example_ids = [e['example_id'] for e in ds_filter]
    assert example_ids == ['example_id_1']

    with pytest.raises(RuntimeError):
        ds_filter = ds.shuffle(reshuffle=True).filter(filter_fn, lazy=False)
def main(
    _run,
    out,
    mask_estimator,
    Observation,
    beamformer,
    postfilter,
    normalize_audio=True,
):
    if dlp_mpi.IS_MASTER:
        from sacred.commands import print_config
        print_config(_run)

    ds = get_dataset()

    data = []

    out = Path(out)

    for ex in dlp_mpi.split_managed(ds.sort(), allow_single_worker=True):

        if mask_estimator is None:
            mask = None
        elif mask_estimator == 'cacgmm':
            mask = get_mask_from_cacgmm(ex)
        else:
            mask = get_mask_from_oracle(ex, mask_estimator)

        metric, score = get_scores(
            ex,
            mask,
            Observation=Observation,
            beamformer=beamformer,
            postfilter=postfilter,
        )

        est0, est1 = metric.speech_prediction_selection
        dump_audio(est0,
                   out / ex['dataset'] / f"{ex['example_id']}_0.wav",
                   normalize=normalize_audio)
        dump_audio(est1,
                   out / ex['dataset'] / f"{ex['example_id']}_1.wav",
                   normalize=normalize_audio)

        data.append(
            dict(
                example_id=ex['example_id'],
                value=score,
                dataset=ex['dataset'],
            ))

        # print(score, repr(score))

    data = dlp_mpi.gather(data)

    if dlp_mpi.IS_MASTER:
        data = [entry for worker_data in data for entry in worker_data]

        data = {  # itertools.groupby expect an order
            dataset: list(subset)
            for dataset, subset in from_list(data).groupby(
                lambda ex: ex['dataset']).items()
        }

        for dataset, sub_data in data.items():
            print(f'Write details to {out}.')
            dump_json(sub_data, out / f'{dataset}_scores.json')

        for dataset, sub_data in data.items():
            summary = {}
            for k in sub_data[0]['value'].keys():
                m = np.mean([d['value'][k] for d in sub_data])
                print(dataset, k, m)
                summary[k] = m
            dump_json(summary, out / f'{dataset}_summary.json')
Beispiel #4
0
def test_run(
    trainer: 'pt.Trainer',
    train_iterator,
    validation_iterator,
    device=0 if torch.cuda.is_available() else 'cpu',
    test_with_known_iterator_length=False,
    temporary_directory=None,
    *,
    deterministic_atol=1e-5,
    deterministic_rtol=1e-5,
):
    """

    Run a test on the trainer instance (i.e. model test).

    Does not work with layers updating their state such as BatchNorm

    Tests:
     - forward (train and validate)
     - deterministic output in eval
     - simple review dict test

    Args:
        trainer:
        train_iterator:
        validation_iterator:
        device:
        test_with_known_iterator_length:
        temporary_directory:
            Specify a path as alternative to tempfile.TemporaryDirectory().
            Note: This directory will not be deleted and it is expected that
            it is empty.
            Usecase: Fast debugging of the reports to tensorboard.
                     After the test run you can start tensorboard and inspect
                     the reported values.

    Returns:

    """
    print('Start test run')

    @contextlib.contextmanager
    def backup_state_dict(trainer: pt.Trainer):
        state_dict = copy.deepcopy(trainer.state_dict())
        try:
            yield
        finally:
            # pass
            trainer.load_state_dict(state_dict)

    with contextlib.ExitStack() as exit_stack:
        if temporary_directory is None:
            storage_dir = Path(
                exit_stack.enter_context(
                    tempfile.TemporaryDirectory())).expanduser().resolve()
        else:
            storage_dir = temporary_directory.expanduser().resolve()
            assert list(storage_dir.glob('*')) == [], list(
                storage_dir.glob('*'))
        exit_stack.enter_context(
            mock.patch.object(
                trainer,
                'iteration',
                new=-1,
            ))
        exit_stack.enter_context(mock.patch.object(
            trainer,
            'epoch',
            new=-1,
        ))

        class SpyMagicMock(mock.MagicMock):
            def __init__(self, *args, **kw):
                super().__init__(*args, **kw)
                self.spyed_return_values = []

            def __call__(self, *args, **kw):
                ret = super().__call__(*args, **kw)
                self.spyed_return_values += [ret]
                return ret

        # Spy trainer.step, optimizer.step, trainer.validate and
        # trainer.get_default_hooks to check lates the output values and/or
        # the number of calls.
        trainer_step_mock = exit_stack.enter_context(
            mock.patch.object(
                trainer,
                'step',
                wraps=trainer.step,
                new_callable=SpyMagicMock,
            ))
        optimizer_step = pb.utils.nested.nested_op(
            lambda x: (exit_stack.enter_context(
                mock.patch.object(
                    x,
                    'step',
                    wraps=x.step,
                    new_callable=SpyMagicMock,
                )) if x is not None else None), trainer.optimizer)

        validate_mock = exit_stack.enter_context(
            mock.patch.object(
                trainer,
                'validate',
                wraps=trainer.validate,
                new_callable=SpyMagicMock,
            ))

        class Iterable:
            def __init__(self, data):
                self.data = data

            def __iter__(self):
                yield from self.data

            def __len__(self):
                raise TypeError(
                    f'object of type {self.__class__.__name__} has no len()')

        virtual_minibatch_size = trainer.virtual_minibatch_size

        sub_train_iterator = list(
            itertools.islice(train_iterator, 2 * virtual_minibatch_size))
        sub_validation_iterator = list(itertools.islice(
            validation_iterator, 2))

        if test_with_known_iterator_length:
            sub_train_iterator = lazy_dataset.from_list(sub_train_iterator)
            sub_validation_iterator = lazy_dataset.from_list(
                sub_validation_iterator)
        else:
            sub_train_iterator = Iterable(sub_train_iterator)
            sub_validation_iterator = Iterable(sub_validation_iterator)

        @contextlib.contextmanager
        def ensure_unchanged_parameter(trainer):
            parameters_before = {
                name: parameter.detach().cpu().numpy().copy()
                for name, parameter in trainer.model.named_parameters()
            }

            yield

            parameters_after = {
                name: parameter.detach().cpu().numpy().copy()
                for name, parameter in trainer.model.named_parameters()
            }

            assert parameters_before.keys() == parameters_after.keys(), (
                parameters_before.keys(), parameters_after.keys())
            for k in parameters_before.keys():
                np.testing.assert_equal(
                    parameters_before[k],
                    parameters_after[k],
                )

        # ================ Train Call ===================
        with ensure_unchanged_parameter(trainer):
            hooks = [
                SummaryHook((1, 'epoch')),
                CheckpointHook((1, 'epoch')),
                BackOffValidationHook((1, 'epoch'),
                                      sub_validation_iterator,
                                      max_checkpoints=None),
                StopTrainingHook((1, 'epoch'))
            ]
            exit_stack.enter_context(
                mock.patch.object(
                    trainer,
                    'hooks',
                    new=hooks,
                ))
            with backup_state_dict(trainer):

                exit_stack.enter_context(
                    mock.patch.object(
                        trainer,
                        'storage_dir',
                        new=storage_dir,
                    ))

                trainer.train(sub_train_iterator, device=device)
            with backup_state_dict(trainer):
                storage_dir_2 = Path(
                    exit_stack.enter_context(
                        tempfile.TemporaryDirectory())).expanduser().resolve()
                exit_stack.enter_context(
                    mock.patch.object(
                        trainer,
                        'storage_dir',
                        new=storage_dir_2,
                    ))

                trainer.train(
                    sub_train_iterator,
                    device=device,
                )

        def assert_step(x):
            if x is not None:
                assert x.call_count == 4, x.call_count

        pb.utils.nested.nested_op(assert_step, optimizer_step)

        # before and after training for two trainings -> 4
        assert validate_mock.call_count == 4, validate_mock.call_count

        assert trainer_step_mock.call_count == (
            4 * virtual_minibatch_size + 8), (trainer_step_mock.call_count,
                                              virtual_minibatch_size)

        def trainer_step_mock_to_inputs_output_review(review_mock):
            # sig = inspect.signature(review_mock._mock_wraps)
            for call, (loss, inputs, output,
                       review) in zip(review_mock.call_args_list,
                                      review_mock.spyed_return_values):
                # args, kwargs = tuple(call)
                # model, example, timer, device
                # _, inputs, timer, _ = sig.bind(*args, **kwargs).arguments.values()
                yield dict(inputs=inputs,
                           output=output,
                           review=review,
                           loss=loss)

        # trainer_step_mock_to_inputs_output_review
        step_returns = \
            trainer_step_mock_to_inputs_output_review(
                trainer_step_mock
            )
        step_returns = list(step_returns)
        step_returns_1 = step_returns[:len(step_returns) // 2]
        step_returns_2 = step_returns[len(step_returns) // 2:]

        if virtual_minibatch_size == 1:
            dt1, dt2, tr1, tr2, dt3, dt4 = step_returns_1
            dt5, dt6, tr3, tr4, dt7, dt8 = step_returns_2
        else:
            dt1, dt2 = step_returns_1[:2]
            dt3, dt4 = step_returns_1[-2:]
            dt5, dt6 = step_returns_2[:2]
            dt7, dt8 = step_returns_2[-2:]

        nested_test_assert_allclose(dt1['output'],
                                    dt5['output'],
                                    atol=deterministic_atol,
                                    rtol=deterministic_rtol)
        nested_test_assert_allclose(dt2['output'],
                                    dt6['output'],
                                    atol=deterministic_atol,
                                    rtol=deterministic_rtol)
        nested_test_assert_allclose(dt1['review'],
                                    dt5['review'],
                                    atol=deterministic_atol,
                                    rtol=deterministic_rtol)
        nested_test_assert_allclose(dt2['review'],
                                    dt6['review'],
                                    atol=deterministic_atol,
                                    rtol=deterministic_rtol)

        # Can not test these, because dropout makes them unequal
        # nested_test_assert_allclose(dt3['output'], dt7['output'])
        # nested_test_assert_allclose(dt4['output'], dt8['output'])
        # nested_test_assert_allclose(dt3['review'], dt7['review'])
        # nested_test_assert_allclose(dt4['review'], dt8['review'])

        # Expect that the initial loss is equal for two runs
        nested_test_assert_allclose(dt1['loss'],
                                    dt5['loss'],
                                    rtol=1e-6,
                                    atol=1e-6)
        nested_test_assert_allclose(dt2['loss'],
                                    dt6['loss'],
                                    rtol=1e-6,
                                    atol=1e-6)
        try:
            with np.testing.assert_raises(AssertionError):
                # Expect that the loss changes after training.
                nested_test_assert_allclose(dt1['loss'],
                                            dt3['loss'],
                                            rtol=1e-6,
                                            atol=1e-6)
                nested_test_assert_allclose(dt2['loss'],
                                            dt4['loss'],
                                            rtol=1e-6,
                                            atol=1e-6)
                nested_test_assert_allclose(dt5['loss'],
                                            dt7['loss'],
                                            rtol=1e-6,
                                            atol=1e-6)
                nested_test_assert_allclose(dt6['loss'],
                                            dt8['loss'],
                                            rtol=1e-6,
                                            atol=1e-6)
        except AssertionError:
            raise AssertionError(
                'The loss of the model did not change between two validations.'
                '\n'
                'This is usually caused from a zero gradient or the loss is'
                'independent of the parameters')

        allowed_summary_keys = ({'loss', 'losses'} | set(
            pt.trainer.SummaryHook.empty_summary_dict().keys()))
        if 0 != len(set(dt1['review'].keys()) - set(allowed_summary_keys)):
            got = set(dt1['review'].keys())
            raise ValueError(f'Found keys: {got}\n'
                             f'Allowed: {allowed_summary_keys}\n'
                             f'Delta: {got - allowed_summary_keys}')
        # end trainer_step_mock_to_inputs_output_review

        # Test that the summary is empty
        for hook in hooks:
            summary = getattr(hook, 'summary', {})
            assert all([len(s) == 0
                        for s in summary.values()]), (hook, summary)

        files = list(storage_dir.glob('*'))
        assert len(files) == 2, files

        for file in files:
            if 'tfevents' in file.name:
                pass
            elif file.name == 'checkpoints':
                checkpoint_names = {f.name for f in file.glob('*')}
                expect = {
                    'ckpt_latest.pth',
                    'ckpt_best_loss.pth',
                    f'ckpt_0.pth',
                    # f'ckpt_{2*virtual_minibatch_size}.pth',
                    f'ckpt_2.pth',
                }
                if checkpoint_names != expect:
                    os.system(f'ls -lha {file}')
                    raise AssertionError((checkpoint_names, expect))

                ckpt_best = (file / 'ckpt_best_loss.pth').resolve().name
                ckpt_last = (file / 'ckpt_latest.pth').resolve().name

                # This check does not always work, because it is not guaranteed,
                # that the training improves the loss on the validation data
                # assert ckpt_best == 'ckpt_2.pth', ckpt_best

                expected_ckpt_last = f'ckpt_2.pth'
                assert ckpt_last == expected_ckpt_last, (ckpt_last,
                                                         expected_ckpt_last)

                # ckpt_state = pb.io.load_json(file / 'ckpt_state.json')
                # assert ckpt_state == {
                #     'latest_checkpoint_path':
                #         '/tmp/tmp_h0sygfv/checkpoints/ckpt_4.pth',
                #     'metrics': {
                #         'loss': {
                #             'criterion': 'min',
                #             'key': 'loss',
                #             'paths': ['ckpt_2.pth'],
                #             'values': [2.5040305852890015],
                #         }
                #     }
                # }, ckpt_state

    print('Successfully finished test run')