Example #1
0
def run(_run, chime6, test_run=False):
    if dlp_mpi.IS_MASTER:
        print_config(_run)
        _dir = get_dir()
        print('Experiment dir:', _dir)
    else:
        _dir = None

    _dir = dlp_mpi.bcast(_dir, dlp_mpi.MASTER)

    if chime6:
        enhancer = get_enhancer_chime6()
    else:
        enhancer = get_enhancer()

    if test_run:
        print('Database', enhancer.db)

    session_ids = get_session_ids()
    if dlp_mpi.IS_MASTER:
        print('Enhancer:', enhancer)
        print(session_ids)

    enhancer.enhance_session(session_ids,
                             _dir / 'audio',
                             dataset_slice=test_run,
                             audio_dir_exist_ok=True)
    if dlp_mpi.IS_MASTER:
        print('Finished experiment dir:', _dir)
Example #2
0
def rirs(
    database_path,
    datasets,
    sample_rate,
    filter_length,
):
    database_path = Path(database_path)

    if dlp_mpi.IS_MASTER:
        scenario_json = database_path / "scenarios.json"
        with scenario_json.open() as f:
            database = json.load(f)
        for dataset in datasets:
            dataset_path = database_path / dataset
            dataset_path.mkdir(parents=True, exist_ok=True)
    else:
        database = None
    database = dlp_mpi.bcast(database)

    for dataset_name, dataset in database['datasets'].items():
        print(f'RANK={dlp_mpi.RANK}, SIZE={dlp_mpi.SIZE}:'
              f' Starting {dataset_name}.')

        for _example_id, example in dlp_mpi.split_managed(
                list(sorted(dataset.items())),
                progress_bar=True,
                is_indexable=True,
        ):
            h = generate_rir(room_dimensions=example['room_dimensions'],
                             source_positions=example['source_position'],
                             sensor_positions=example['sensor_position'],
                             sound_decay_time=example['sound_decay_time'],
                             sample_rate=sample_rate,
                             filter_length=filter_length,
                             sensor_orientations=None,
                             sensor_directivity=None,
                             sound_velocity=343)
            assert not np.any(np.isnan(
                h)), f"{np.sum(np.isnan(h))} values of {h.size} are NaN."

            K, D, T = h.shape
            directory = database_path / dataset_name / _example_id
            directory.mkdir(parents=False, exist_ok=False)

            for k in range(K):
                # Although storing as np.float64 does not allow every reader
                # to access the files, it does not require normalization and
                # we are unsure how much precision is needed for RIRs.
                with soundfile.SoundFile(str(directory / f"h_{k}.wav"),
                                         subtype='DOUBLE',
                                         samplerate=sample_rate,
                                         mode='w',
                                         channels=h.shape[1]) as f:
                    f.write(h[k, :, :].T)

        dlp_mpi.barrier()

        print(f'RANK={dlp_mpi.RANK}, SIZE={dlp_mpi.SIZE}:'
              f' Finished {dataset_name}.')
Example #3
0
    def from_file(
        cls,
        config_path: Path,
        in_config_path: str = '',
        consider_mpi=False,
    ):
        """Instantiate the module from given config_file.

        Args:
            config_path:
            in_config_path:
            consider_mpi:
                If True and mpi is used, only read config_path and
                checkpoint_path once and broadcast the content with mpi.
                Reduces the io load.

        Returns:


        """
        config_path = Path(config_path).expanduser().resolve()

        assert config_path.is_file(), config_path

        assert config_path.is_file(), f'Expected {config_path} is file.'

        def load_config(config_path):
            if config_path.suffix == '.json':
                import json
                with config_path.open() as fp:
                    configurable_config = json.load(fp)
            elif config_path.suffix == '.yaml':
                import yaml
                with config_path.open() as fp:
                    configurable_config = yaml.safe_load(fp)
            else:
                raise ValueError(config_path)
            return configurable_config

        if consider_mpi:
            import dlp_mpi
            if dlp_mpi.IS_MASTER:
                configurable_config = load_config(config_path=config_path)
            else:
                configurable_config = None
            configurable_config = dlp_mpi.bcast(configurable_config)
        else:
            configurable_config = load_config(config_path=config_path)
        if config_path != '':
            for part in in_config_path.split('.'):
                configurable_config = configurable_config[part]
        return cls.from_config(configurable_config)
Example #4
0
    def load_checkpoint(
        self,
        checkpoint_path: (Path, str),
        in_checkpoint_path: str = 'model',
        map_location='cpu',
        consider_mpi=False,
    ) -> 'Module':
        """Update the module parameters from the given checkpoint.

        Args:
            checkpoint_path:
            in_checkpoint_path:
            map_location:
            consider_mpi:
                If True and mpi is used, only read config_path and
                checkpoint_path once and broadcast the content with mpi.
                Reduces the io load.

        Returns:


        """
        checkpoint_path = Path(checkpoint_path).expanduser().resolve()

        assert checkpoint_path.is_file(), checkpoint_path

        # Load weights
        if consider_mpi:
            import dlp_mpi
            if dlp_mpi.IS_MASTER:
                checkpoint_path_content = Path(checkpoint_path).read_bytes()
            else:
                checkpoint_path_content = None
            checkpoint_path_content = dlp_mpi.bcast(checkpoint_path_content)

            checkpoint = torch.load(
                io.BytesIO(checkpoint_path_content),
                map_location=map_location,
            )
        else:
            checkpoint = torch.load(checkpoint_path, map_location=map_location)

        if in_checkpoint_path:
            for part in in_checkpoint_path.split('.'):
                try:
                    checkpoint = deflatten(checkpoint, maxdepth=1)
                    checkpoint = checkpoint[part]
                except KeyError:
                    raise ValueError(part, in_checkpoint_path, checkpoint)
        self.load_state_dict(checkpoint)

        return self
Example #5
0
def write_wavs(dst_dir: Path, wsj0_root: Path, wsj1_root: Path, sample_rate):
    wsj0_root = Path(wsj0_root).expanduser().resolve()
    wsj1_root = Path(wsj1_root).expanduser().resolve()
    dst_dir = Path(dst_dir).expanduser().resolve()
    assert wsj0_root.exists(), wsj0_root
    assert wsj1_root.exists(), wsj1_root

    assert not dst_dir == wsj0_root, (wsj0_root, dst_dir)
    assert not dst_dir == wsj1_root, (wsj1_root, dst_dir)
    # Expect, that the dst_dir does not exist to make sure to not overwrite.
    if dlp_mpi.IS_MASTER:
        dst_dir.mkdir(parents=True, exist_ok=False)

    if dlp_mpi.IS_MASTER:
        # Search for CD numbers, e.g. "13-34.1"
        # CD stands for compact disk.
        cds_0 = list(wsj0_root.rglob("*-*.*"))
        cds_1 = list(wsj1_root.rglob("*-*.*"))
        cds = set(cds_0 + cds_1)

        expected_number_of_files = {
            'pl': 3,
            'ndx': 106,
            'ptx': 3547,
            'dot': 3585,
            'txt': 256
        }
        number_of_written_files = dict()
        for suffix in expected_number_of_files.keys():
            files_0 = list(wsj0_root.rglob(f"*.{suffix}"))
            files_1 = list(wsj1_root.rglob(f"*.{suffix}"))
            files = set(files_0 + files_1)
            # Filter files that do not have a folder that matches "*-*.*".
            files = {
                file
                for file in files
                if any([fnmatch.fnmatch(part, "*-*.*") for part in file.parts])
            }

            # the readme.txt file in the parent directory is not copied
            print(f"About to write ca. {len(files)} {suffix} files.")
            for cd in cds:
                cd_files = list(cd.rglob(f"*.{suffix}"))
                for file in cd_files:
                    target = dst_dir / file.relative_to(cd.parent)
                    target.parent.mkdir(parents=True, exist_ok=True)
                    if not target.is_file():
                        shutil.copy(file, target.parent)
            number_of_written_files[suffix] = len(
                list(dst_dir.rglob(f"*.{suffix}")))
            print(f"Writing {number_of_written_files[suffix]} {suffix} files.")
            print(
                f'Expected {expected_number_of_files[suffix]} {suffix} files.')

        for suffix in expected_number_of_files.keys():
            message = (f'Expected that '
                       f'{expected_number_of_files[suffix]} '
                       f'files with the {suffix} are written. '
                       f'But only {number_of_written_files} are written. ')
            if (number_of_written_files[suffix] !=
                    expected_number_of_files[suffix]):
                warnings.warn(message)

            if suffix == 'pl' and number_of_written_files[suffix] == 1:
                raise RuntimeError(
                    'Found only one pl file although we expected three. '
                    'A typical reason is having only WSJ0. '
                    'Please make sure you have WSJ0+1 = WSJ COMPLETE.')

    if dlp_mpi.IS_MASTER:
        # Ignore .wv2 files since they are not referenced in our database
        # anyway
        wsj_nist_files = [(cd, nist_file) for cd in cds
                          for nist_file in cd.rglob("*.wv1")]
        print(f"About to write {len(wsj_nist_files)} wav files.")
    else:
        wsj_nist_files = None

    wsj_nist_files = dlp_mpi.bcast(wsj_nist_files)

    for nist_file_tuple in dlp_mpi.split_managed(wsj_nist_files):
        cd, nist_file = nist_file_tuple
        assert isinstance(nist_file, Path), nist_file
        signal = read_nist_wsj(nist_file, expected_sample_rate=16000)
        file = nist_file.with_suffix('.wav')
        target = dst_dir / file.relative_to(cd.parent)
        assert not target == nist_file, (nist_file, target)
        target.parent.mkdir(parents=True, exist_ok=True)
        signal = resample_with_sox(signal, rate_in=16000, rate_out=sample_rate)
        # normalization to mean 0:
        signal = signal - np.mean(signal)
        # normalization:
        #   Correction, because the allowed values are in the range [-1, 1).
        #       => "1" is not a vaild value
        correction = (2**15 - 1) / (2**15)
        signal = signal * (correction / np.amax(np.abs(signal)))
        with soundfile.SoundFile(
                str(target),
                samplerate=sample_rate,
                channels=1,
                subtype='FLOAT',
                mode='w',
        ) as f:
            f.write(signal.T)

    dlp_mpi.barrier()
    if dlp_mpi.IS_MASTER:
        created_files = list(set(list(dst_dir.rglob("*.wav"))))
        print(f"Written {len(created_files)} wav files.")
        assert len(wsj_nist_files) == len(created_files), (len(wsj_nist_files),
                                                           len(created_files))
Example #6
0
def write_wavs(dst_dir: Path, wsj0_root: Path, wsj1_root: Path, sample_rate):
    wsj0_root = Path(wsj0_root).expanduser().resolve()
    wsj1_root = Path(wsj1_root).expanduser().resolve()
    dst_dir = Path(dst_dir).expanduser().resolve()
    assert wsj0_root.exists(), wsj0_root
    assert wsj1_root.exists(), wsj1_root

    assert not dst_dir == wsj0_root, (wsj0_root, dst_dir)
    assert not dst_dir == wsj1_root, (wsj1_root, dst_dir)
    # Expect, that the dst_dir does not exist to make sure to not overwrite.
    if dlp_mpi.IS_MASTER:
        dst_dir.mkdir(parents=True, exist_ok=False)

    if dlp_mpi.IS_MASTER:
        cds_0 = list(wsj0_root.rglob("*-*.*"))
        cds_1 = list(wsj1_root.rglob("*-*.*"))
        cds = set(cds_0 + cds_1)
        for suffix in 'pl ndx ptx dot txt'.split():
            files_0 = list(wsj0_root.rglob(f"*.{suffix}"))
            files_1 = list(wsj1_root.rglob(f"*.{suffix}"))
            files = set(files_0 + files_1)
            # the readme.txt file in the parent directory is not copied
            print(f"About to write ca. {len(files)} {suffix} files.")
            for cd in cds:
                cd_files = list(cd.rglob(f"*.{suffix}"))
                for file in cd_files:
                    target = dst_dir / file.relative_to(cd.parent)
                    target.parent.mkdir(parents=True, exist_ok=True)
                    if not target.is_file():
                        shutil.copy(file, target.parent)
            written_files = list(dst_dir.rglob(f"*.{suffix}"))
            print(f"Writing {len(written_files)} {suffix} files.")
            # assert len(written_files) == len(files), (files, written_files)

    if dlp_mpi.IS_MASTER:
        # Ignore .wv2 files since they are not referenced in our database
        # anyway
        wsj_nist_files = [(cd, nist_file) for cd in cds
                          for nist_file in cd.rglob("*.wv1")]
        print(f"About to write {len(wsj_nist_files)} wav files.")
    else:
        wsj_nist_files = None

    wsj_nist_files = dlp_mpi.bcast(wsj_nist_files)

    for nist_file_tuple in dlp_mpi.split_managed(wsj_nist_files):
        cd, nist_file = nist_file_tuple
        assert isinstance(nist_file, Path), nist_file
        signal = read_nist_wsj(nist_file, expected_sample_rate=16000)
        file = nist_file.with_suffix('.wav')
        target = dst_dir / file.relative_to(cd.parent)
        assert not target == nist_file, (nist_file, target)
        target.parent.mkdir(parents=True, exist_ok=True)
        # normalization:
        #   Correction, because the allowed values are in the range [-1, 1).
        #       => "1" is not a vaild value
        signal = resample_with_sox(signal, rate_in=16000, rate_out=sample_rate)
        correction = (2 ** 15 - 1) / (2 ** 15)
        signal = signal * (correction / np.amax(np.abs(signal)))
        with soundfile.SoundFile(
                str(target), samplerate=sample_rate, channels=1,
                subtype='FLOAT', mode='w',
        ) as f:
            f.write(signal.T)

    dlp_mpi.barrier()
    if dlp_mpi.IS_MASTER:
        created_files = list(set(list(dst_dir.rglob("*.wav"))))
        print(f"Written {len(created_files)} wav files.")
        assert len(wsj_nist_files) == len(created_files), (len(wsj_nist_files), len(created_files))
Example #7
0
def get_new_folder(
    basedir,
    try_id=None,
    dry_run=False,
    mkdir=True,
    consider_mpi=False,
):
    """

    The core source code if copied from the FileStorageObserver in sacred.

    Get a sub folder from basedir with sacred style.
    Assume integer folder names and select as return folder the last folder
    integer plus one.

    Args:
        basedir:
        try_id: Suggestion for the folder name. Can be used as prefix.
            try_id=prefix with return a folder like: prefix, prefix_2, ...
        dry_run: Per default also creates the directory to be thread safe.
        mkdir: With mkdir this function is thread and process safe.
        consider_mpi: If True only the master selects a folder and syncs the
            folder with the slaves.

    Returns:

    """
    if consider_mpi:
        import dlp_mpi
        if dlp_mpi.IS_MASTER:
            pass
        else:
            new_folder = None
            new_folder = dlp_mpi.bcast(new_folder)
            return new_folder

    suggested_id = try_id
    basedir = Path(basedir).expanduser()

    for i in range(200):
        if suggested_id is None:
            dir_nrs = [
                int(d) for d in os.listdir(str(basedir))
                if (basedir / d).is_dir() and d.isdigit()
            ]
            _id = max(dir_nrs + [0]) + 1
        else:
            if (basedir / f'{suggested_id}').exists():
                dir_nrs = [
                    int(re.sub(f'{suggested_id}_?', '', str(d)))
                    for d in os.listdir(str(basedir))
                    if (basedir / d).is_dir()
                    if fnmatch.fnmatch(d, f'{suggested_id}_*')
                    if re.sub(f'{suggested_id}_?', '', str(d)).isdigit()
                ]
                _id = max(dir_nrs + [1]) + 1
                _id = f'{suggested_id}_{_id}'
            else:
                _id = f'{suggested_id}'

        simu_dir = basedir / str(_id)

        try:
            if dry_run:
                print(f'dry_run: "os.mkdir({simu_dir})"')
            elif mkdir is False:
                pass
            elif mkdir is True:
                simu_dir.mkdir()
            else:
                raise ValueError(mkdir)

            if consider_mpi:
                import dlp_mpi
                assert dlp_mpi.IS_MASTER, dlp_mpi.RANK
                simu_dir = dlp_mpi.bcast(simu_dir)

            return simu_dir
        except FileExistsError:
            # Catch race conditions
            if i > 100:
                # After some tries,
                # expect that something other went wrong
                raise
Example #8
0
def get_new_folder(
    basedir,
    try_id=None,
    dry_run=False,
    consider_mpi=False,
    chdir=False,
    mkdir=True,
):
    """

    Args:
        basedir:
        try_id:
        mkdir: Enables thread safety
        dry_run: Per default also creates the directory to be thread safe.

    Returns:

    """
    if consider_mpi:
        import dlp_mpi
        if dlp_mpi.IS_MASTER:
            pass
        else:
            new_folder = None
            new_folder = dlp_mpi.bcast(new_folder)
            return new_folder

    suggested_id = try_id
    basedir = Path(basedir).expanduser().resolve()

    if Path('/net') in basedir.parents:
        # If nt filesystem, assert not in /net/home
        assert Path('/net/home') not in basedir.parents, basedir

    for i in range(200):
        if suggested_id is None:
            dir_nrs = [
                int(d) for d in os.listdir(str(basedir))
                if (basedir / d).is_dir() and d.isdigit()
            ]
            _id = max(dir_nrs + [0]) + 1
        else:
            if (basedir / f'{suggested_id}').exists():
                dir_nrs = [
                    int(re.sub(f'{suggested_id}_?', '', str(d)))
                    for d in os.listdir(str(basedir))
                    if (basedir / d).is_dir()
                    if fnmatch.fnmatch(d, f'{suggested_id}_*')
                    if re.sub(f'{suggested_id}_?', '', str(d)).isdigit()
                ]
                _id = max(dir_nrs + [1]) + 1
                _id = f'{suggested_id}_{_id}'
            else:
                _id = f'{suggested_id}'

        simu_dir = basedir / str(_id)

        try:
            if dry_run:
                print(f'dry_run: "os.mkdir({simu_dir})"')
            elif mkdir is False:
                pass
            elif mkdir is True:
                simu_dir.mkdir()
            else:
                raise ValueError(mkdir)

            if consider_mpi:
                import dlp_mpi
                assert dlp_mpi.IS_MASTER, dlp_mpi.RANK
                simu_dir = dlp_mpi.bcast(simu_dir)

            if chdir:
                os.chdir(simu_dir)

            return simu_dir
        except FileExistsError:
            # Catch race conditions
            if i > 100:
                # After some tries,
                # expect that something other went wrong
                raise
Example #9
0
parser.add_argument("--json_path",
                    default="data/sms_wsj.json",
                    help="Full path to sms_wsj.json")


def main(conf):
    experiment.run(config_updates=dict(
        json_path=conf["main_args"]["json_path"], **conf["mm_config"]))


if __name__ == "__main__":
    if dlp_mpi.IS_MASTER:
        # We start with opening the config file conf.yml as a dictionary from
        # which we can create parsers. Each top level key in the dictionary defined
        # by the YAML file creates a group in the parser.
        with open("local/conf.yml") as f:
            def_conf = yaml.safe_load(f)
        parser = prepare_parser_from_dict(def_conf, parser=parser)
        # Arguments are then parsed into a hierarchical dictionary (instead of
        # flat, as returned by argparse) to falicitate calls to the different
        # asteroid methods (see in main).
        # plain_args is the direct output of parser.parse_args() and contains all
        # the attributes in an non-hierarchical structure. It can be useful to also
        # have it so we included it here but it is not used.
        arg_dict, plain_args = parse_args_as_dict(parser,
                                                  return_plain_args=True)
    else:
        arg_dict = None
    arg_dict = dlp_mpi.bcast(arg_dict, root=dlp_mpi.MASTER)
    main(arg_dict)
Example #10
0
def get_new_subdir(
    basedir: [str, Path],
    *,
    id_naming: [str, callable] = 'index',
    mkdir: bool = True,
    prefix: str = None,
    suffix: str = None,
    consider_mpi: bool = False,
    dry_run: bool = False,
):
    """Determine a new non-existent sub directory.

    Features:
     - With mkdir: Thread and process save.
     - Different conventions for ID naming possible, default running index.
     - MPI aware: Get the folder on one worker and distribute to others.

    Args:
        basedir:
            The new subdir will be inside this directory
        id_naming:
            The id naming that is used for the folder name.
             - str: 'index':
                The largest index in basedir + 1.
                e.g.: '1', '2', ...
             - str: 'time': A timestamp with the format %Y-%m-%d-%H-%M-%S
                e.g. '2020-08-13-17-02-57'
             - callable: Each call should generate a new name.
        mkdir:
            Creates the dir and makes the program process/thread safe.
            Note this option ensures that you don't get a
            conflict between two concurrent calls of get_new_folder.
            Example:
                You launch several times your programs and each should get
                another folder (e.g. hyperparameter search). When inspecting
                basedir maybe some recognize they can use '2' as sub folder.
                This option ensures, that only one program gets the '2' and the
                remaining programs search for another free id.
        prefix:
            Optional prefix for the id. e.g.: '2' -> '{prefix}_2'
        suffix:
            Optional suffix for the id. e.g.: '2' -> '2_{suffix}'
        consider_mpi:
            If True, only search on one mpi process for the folder and
            distribute the folder name.
            When using mpi (and `consider_mpi is False`) the following can/will
            happen
             - When mkdir is True every process will get another folder.
               i.e. each process has a folder just for this process.
             - Warning: Never use mpi, when `mkdir is False` and
               `consider_mpi is False`. Depending on some random factors
               (e.g. python startup time) all workers could get the same
               folder, but mostly some get the same folder and some different.
               You never want this.
        dry_run:
            When true, disables mkdir and prints the folder name.

    Returns:
        pathlib.Path of the new subdir


    >>> get_new_subdir('/', dry_run=True)  # root folder usually contain no digits
    dry_run: "os.mkdir(/1)"
    PosixPath('/1')

    >>> import numpy as np
    >>> np.random.seed(0)  # This is for doctest. Never use it in practise.
    >>> get_new_subdir('/', id_naming=NameGenerator(), dry_run=True)
    dry_run: "os.mkdir(/smooth_tomato_finch)"
    PosixPath('/smooth_tomato_finch')
    """

    if consider_mpi:
        import dlp_mpi
        if dlp_mpi.IS_MASTER:
            pass
        else:
            new_folder = None
            new_folder = dlp_mpi.bcast(new_folder)
            return new_folder

    basedir = Path(basedir).expanduser().resolve()
    if not basedir.exists():
        if dry_run:
            print(f'dry_run: "os.makedirs({basedir})"')
            # ToDo: Make this working.
            #       Will fail when calling os.listdir
        else:
            basedir.mkdir(parents=True)

    if Path('/net') in basedir.parents:
        # If nt filesystem, assert not in /net/home
        assert Path('/net/home') not in basedir.parents, basedir

    prefix_ = f'{prefix}_' if prefix else ''
    _suffix = f'_{suffix}' if suffix else ''

    for i in range(200):
        if id_naming == 'index':
            if prefix is None and suffix is None:
                dir_nrs = [
                    int(d) for d in os.listdir(str(basedir))
                    if (basedir / d).is_dir() and d.isdigit()
                ]
                _id = max(dir_nrs + [0]) + 1
            else:

                def remove_pre_suf(d):
                    return _removesuffix(_removeprefix(str(d), prefix_),
                                         _suffix)

                dir_nrs = [
                    int(remove_pre_suf(d)) for d in os.listdir(str(basedir))
                    if (basedir / d).is_dir()
                    if fnmatch.fnmatch(d, f'{prefix_}*{_suffix}')
                    if remove_pre_suf(d).isdigit()
                ]
                dir_nrs += [0]
                _id = max(dir_nrs) + 1
                _id = f'{prefix_}{_id}{_suffix}'
        elif id_naming == 'time':
            if i != 0:
                time.sleep(1)
            _id = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
            _id = f'{prefix_}{_id}{_suffix}'
        elif callable(id_naming):
            _id = id_naming()
        else:
            raise ValueError(id_naming)

        simu_dir = basedir / str(_id)

        try:
            if dry_run:
                print(f'dry_run: "os.mkdir({simu_dir})"')
            elif mkdir is False:
                pass
            elif mkdir is True:
                simu_dir.mkdir()
            else:
                raise ValueError(mkdir)

            if consider_mpi:
                import dlp_mpi
                assert dlp_mpi.IS_MASTER, dlp_mpi.RANK
                simu_dir = dlp_mpi.bcast(simu_dir)

            return simu_dir
        except FileExistsError:
            # Catch race conditions
            if i > 100:
                # After some tries,
                # expect that something other went wrong
                raise