Ejemplo n.º 1
0
    def available_weights(cls):
        """Return the pretrained weights files as a dict (name -> file)

        Returns
        -------
        weight_files : dict
            A mapping 'weights name' -> 'weights files', where the
            files are absolutes paths to compressed numpy array (.npz
            format). The 'weights name' is either *BabelMulti*,
            *FisherMono* or *FisherTri*.

        Raises
        ------
        RuntimeError
            If the directory `shennong/share/bottleneck` is not found,
            or if all the weights files are missing in it.

        """
        # locate the directory shennong/share/bottleneck, raise if it
        # cannot be found
        directory = pkg_resources.resource_filename(
            pkg_resources.Requirement.parse('shennong'),
            'shennong/share/bottleneck')
        if not os.path.isdir(directory):  # pragma: nocover
            raise RuntimeError(f'directory not found: {directory}')

        # retrieve the weights files
        expected_files = {
            f[0]: os.path.join(directory, f[1] + '.npz')
            for f in [('BabelMulti',
                       'Babel-ML17_FBANK_HL1500_SBN80_PhnStates3096'),
                      ('FisherMono',
                       'FisherEnglish_FBANK_HL500_SBN80_PhnStates120'),
                      ('FisherTri',
                       'FisherEnglish_FBANK_HL500_SBN80_triphones2423')]
        }

        # make sure all the files are here, raise a RuntimeError if
        # all files are missing, log a warning is only one or two
        # files are missing
        files = {k: v for k, v in expected_files.items() if os.path.isfile(v)}
        if not files:  # pragma: nocover
            raise RuntimeError('no weights file found in {}'.format(directory))
        for k in expected_files.keys():
            if k not in files:  # pragma: nocover
                get_logger('bottleneck', 'warning').warning(
                    'weights file for "%s" is unavailable', k)

        return files
Ejemplo n.º 2
0
    def load(cls, filename, serializer=None,
             log=get_logger('serializer', 'warning')):
        """Loads a FeaturesCollection from a `filename`

        Parameters
        ----------
        filename : str
            The file to load
        serializer : str, optional
            The file serializer to use for loading, if not specified
            guess the serializer from the `filename` extension
        log : logging.Logger, optional
            Where to send log messages. Default to a logger named 'serializer'
            with a 'warning' level.

        Returns
        -------
        features : :class:`~shennong.features.FeaturesCollection`
            The features loaded from the `filename`

        Raises
        ------
        IOError
            If the `filename` cannot be read
        ValueError
            If the `serializer` or the file extension is not supported,
            if the features loading fails.

        """
        return get_serializer(cls, filename, log, serializer).load()
Ejemplo n.º 3
0
    def save(self, filename, serializer=None, with_properties=True,
             log=get_logger('serializer', 'warning'), **kwargs):
        """Saves a FeaturesCollection to a `filename`

        Parameters
        ----------
        filename : str
            The file to write
        serializer : str, optional
            The file serializer to use for loading, if not specified
            guess the serializer from the `filename` extension
        with_properties : bool, optional
            When False do not save the features properties, default to True.
        log : logging.Logger, optional
            Where to send log messages. Default to a logger named 'serializer'
            with a 'warning' level.
        compress : bool_or_str_or_int, optional
            Only valid for numpy (.npz), matlab (.mat) and h5features (.h5f)
            serializers. When True compress the file. Default to True.
        scp : bool, optional
            Only valid for kaldi (.ark) serializer. When True writes a .scp
            file along with the .ark file. Default to False.

        Raises
        ------
        IOError
            If the file `filename` already exists
        ValueError
            If the `serializer` or the file extension is not supported,
            if the features saving fails.

        """
        get_serializer(self.__class__, filename, log, serializer).save(
            self, with_properties=with_properties, **kwargs)
Ejemplo n.º 4
0
def main():
    # parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'data_directory', help='input/output data directory', type=pathlib.Path)
    parser.add_argument(
        'config_file', help='YAML configuration file', type=pathlib.Path)
    parser.add_argument(
        'corpus', choices=['english', 'xitsonga'], help='corpus to process')
    parser.add_argument(
        '--do-vtln', action='store_true',
        help='extract warped features from pre-trained VTLN')
    parser.add_argument(
        '-j', '--njobs', type=int, default=4, metavar='<int>',
        help='number of parallel jobs (default to %(default)s)')
    parser.add_argument(
        '-v', '--verbose', action='store_true', help='increase log level')
    args = parser.parse_args()

    # check and setup arguments
    data_directory = args.data_directory
    if not data_directory.is_dir():
        raise ValueError(f'directory not found: {data_directory}')

    config = args.config_file
    if not config.is_file():
        raise ValueError(f'file not found: {config}')

    warps = None
    if args.do_vtln:
        warps_file = data_directory / f'{args.corpus}.warps'
        if not warps_file.is_file():
            raise ValueError(f'file not found: {warps_file}')
        warps = {spk: float(warp) for spk, warp in (
            line.strip().split() for line in open(warps_file, 'r'))}

    (data_directory / 'features').mkdir(exist_ok=True)

    log = get_logger('extraction', 'debug' if args.verbose else 'info')

    # load input utterances
    log.info('loading utterances...')
    utterances = Utterances(
        [line.strip().split(' ') for line in open(
            data_directory / f'{args.corpus}.utts', 'r')])

    # extract the features
    features = pipeline.extract_features(
        config, utterances, warps=warps, njobs=args.njobs, log=log)

    # save them
    h5f_file = data_directory / 'features' / f'{args.corpus}_{config.stem}.h5f'
    if args.do_vtln:
        h5f_file = h5f_file.replace('.h5f', '_vtln.h5f')

    features.save(h5f_file)
Ejemplo n.º 5
0
def _check_environment(njobs, log=get_logger('pipeline', 'warning')):
    if njobs == 1:
        return

    try:
        nthreads = int(os.environ['OMP_NUM_THREADS'])
    except KeyError:
        nthreads = None

    if not nthreads or nthreads > 1:
        log.warning(
            'working on %s threads but implicit parallelism is active, '
            'this may slow down the processing. Set the environment variable '
            'OMP_NUM_THREADS=1 to disable this warning', njobs)
Ejemplo n.º 6
0
    def __init__(self,
                 config,
                 utterances,
                 log=get_logger('manager', 'warning')):
        self._config = config
        self._utterances = utterances
        self._warps = {}
        self.log = log

        self._check_utterances()

        # store the metadata because we need to access the sample rate
        # for processors instanciation
        audio_files = set(utt.audio_file for utt in utterances)
        self._audio_metadata = {}
        for audio in audio_files:
            log.debug('scanning %s', audio)
            self._audio_metadata[audio] = Audio.scan(audio)

        # make sure all the audio files are compatible with the pipeline
        log.info('scanning %s utterances...', len(self._utterances))
        self._check_audio_files()

        # the features type to be extracted
        self.features = [
            k for k in self.config.keys() if k in self.valid_features
        ][0]

        # get some framing parameters constant for all processors
        # (retrieve them from a features processor instance)
        proc = self.get_features_processor(next(iter(self.utterances)))
        self.frame_length = proc.frame_length
        self.frame_shift = proc.frame_shift

        # if CMVN by speaker, instanciate a CMVN processor by speaker
        # here, else instanciate a processor per utterance
        if 'cmvn' in self.config:
            if self.config['cmvn']['by_speaker']:
                self._cmvn_processors = {
                    spk: self.get_processor_class('cmvn')(proc.ndims)
                    for spk in set(utt.speaker for utt in self.utterances)
                }
            else:
                self._cmvn_processors = {
                    utt.name: self.get_processor_class('cmvn')(proc.ndims)
                    for utt in self.utterances
                }
Ejemplo n.º 7
0
def command_extract(args):
    """Execute the 'speech-features extract' command"""
    # setup the logger (level given by -q/-v arguments)
    if args.quiet:
        log = utils.null_logger()
        level = 'error'
    else:
        if args.verbose == 0:
            level = 'warning'
        elif args.verbose == 1:
            level = 'info'
        else:  # verbose >= 2
            level = 'debug'
        log = logger.get_logger(name='speech-features', level=level)

    # make sure the output file is not already existing and have a
    # valid extension
    output_file = args.output_file
    if os.path.exists(output_file):
        log.error('output file already exist: %s', output_file)
        return
    output_ext = os.path.splitext(output_file)[1]
    if output_ext not in supported_extensions().keys():
        log.error(
            'output file has an unsupported extension "%s", must be in %s',
            output_ext, ", ".join(supported_extensions().keys()))
        return

    # make sure the input config and utterances exists
    for filename in (args.config, args.utterances):
        if not os.path.exists(filename):
            log.error('input file not found: %s', filename)

    # read the utterances file
    utterances = Utterances.load(args.utterances)

    # run the pipeline
    features = pipeline.extract_features(args.config,
                                         utterances,
                                         njobs=args.njobs,
                                         log=log)

    # save the features
    log.info('saving the features to %s', output_file)
    features.save(output_file)
Ejemplo n.º 8
0
    def set_logger(self,
                   level,
                   formatter='%(levelname)s - %(name)s - %(message)s'):
        """Change level and/or format of the processor's logger

        Parameters
        ----------
        level : str
            The minimum log level handled by the logger (any message above this
            level will be ignored). Must be 'debug', 'info', 'warning' or
            'error'.
        formatter : str, optional
            A string to format the log messages, see
            https://docs.python.org/3/library/logging.html#formatter-objects.
            By default display level and message. Use '%(asctime)s -
            %(levelname)s - %(name)s - %(message)s' to display time, level,
            name and message.

        """
        self._logger = get_logger(self.name, level=level, formatter=formatter)
Ejemplo n.º 9
0
import shennong.pipeline as pipeline
from shennong.logger import get_logger

ENGLISH_ITEM = ('https://raw.githubusercontent.com/bootphon/ABXpy/'
                'zerospeech2015/resources/english.item')

XITSONGA_ITEM = ('https://raw.githubusercontent.com/bootphon/ABXpy/'
                 'zerospeech2015/resources/xitsonga.item')

ENGLISH_FILES_LIST = ('https://raw.githubusercontent.com/bootphon/'
                      'Zerospeech2015/master/english_files.txt')

XITSONGA_FILES_LIST = ('https://raw.githubusercontent.com/bootphon/'
                       'Zerospeech2015/master/xitsonga_files.txt')

log = get_logger('data setup', 'info')


def setup_data(data_directory, buckeye_directory, xitsonga_directory):
    """Setup a data directory with all input data required

    * creates the ``data_directory``
    * make a symlink to ``buckeye_directory`` and ``xitsonga_directory`` in it
    * download the ABX item files for buckeye and xitsonga
    * create the list of utterances for both corpora
    * create the configuration files for features extraction

    """
    # basic checks
    if not buckeye_directory.is_dir():
        raise ValueError(f'directory does not exists: {buckeye_directory}')
Ejemplo n.º 10
0
def main():
    # parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('data_directory',
                        type=pathlib.Path,
                        help='input/output data directory')
    parser.add_argument('conf',
                        choices=['only', 'nocmvn', 'full'],
                        help='pipeline configuration')
    parser.add_argument('warps', type=pathlib.Path, help='VTLN warps to use')
    parser.add_argument('-o',
                        '--output-file',
                        type=pathlib.Path,
                        help='features file')
    parser.add_argument(
        '-j',
        '--njobs',
        type=int,
        default=4,
        metavar='<int>',
        help='number of parallel jobs (default to %(default)s)')
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='increase log level')
    args = parser.parse_args()

    # check and setup arguments
    data_directory = args.data_directory
    if not data_directory.is_dir():
        raise ValueError(f'directory not found: {data_directory}')

    config = data_directory / 'config' / f'mfcc_{args.conf}.yaml'
    if not config.is_file():
        raise ValueError(f'file not found: {config}')

    warps = None
    if args.warps.is_file():
        warps = {
            spk: float(warp)
            for spk, warp in (line.strip().split()
                              for line in open(args.warps, 'r'))
        }
    else:
        # the case without VTLN
        assert str(args.warps) == 'off'

    log = get_logger('extraction', 'debug' if args.verbose else 'info')

    # load input utterances
    log.info('loading utterances...')
    utterances = Utterances.load(data_directory / 'english.utts')

    # extract the features
    features = pipeline.extract_features(config,
                                         utterances,
                                         warps=warps,
                                         njobs=args.njobs,
                                         log=log)

    # save them
    (args.output_file.parent).mkdir(exist_ok=True, parents=True)
    features.save(args.output_file)
Ejemplo n.º 11
0
 def __init__(self):
     self._logger = get_logger(self.name, level='info')
Ejemplo n.º 12
0
    def concatenate(self,
                    other,
                    tolerance=0,
                    log=get_logger('features', 'info')):
        """Returns the concatenation of this features with `other`

        Build a new Features instance made of the concatenation of
        this instance with the other instance. Their `times` must be
        the equal.

        Parameters
        ----------
        other : Features, shape = [nframes +/- tolerance, ndim2]
            The other features to concatenate at the end of this one
        tolerance : int, optional
            If the number of frames of the two features is different,
            trim the longest one up to a frame difference of
            `tolerance`, otherwise raise a ValueError. This option is
            usefull when concatenating pitch with other 'standard'
            features because pitch processing includes a downsampling
            which can alter the resulting number of frames (the same
            tolerance is applied in Kaldi, e.g. in paste-feats).
            Default to 0.
        log : logging.Logger, optional
            Where to send log messages

        Returns
        -------
        features : Features, shape = [nframes +/- tolerance, ndim1 + ndim2]

        Raises
        ------
        ValueError
            If `other` cannot be concatenated because of
            inconsistencies: number of frames difference greater than
            tolerance, inequal times values.

        """
        # check the number of frames is within the tolerance
        need_trim = False
        diff = abs(self.nframes - other.nframes)
        if diff:
            if not tolerance:
                raise ValueError('features have a different number of frames')
            if tolerance and diff > tolerance:
                raise ValueError(
                    'features differs number of frames, and '
                    'greater than tolerance: |{} - {}| > {}'.format(
                        self.nframes, other.nframes, tolerance))

            log.warning(
                'features differs in number of frames, but '
                'within tolerance (|%s - %s| <= %s), trim the longest one',
                self.nframes, other.nframes, tolerance)
            need_trim = True

        # trim the longest features to the size of the shortest one
        data1 = self.data
        data2 = other.data
        times1 = self.times
        times2 = other.times
        if need_trim:
            if self.nframes > other.nframes:
                data1 = data1[:-diff]
                times1 = times1[:-diff]
            else:
                data2 = data2[:-diff]
                times2 = times2[:-diff]

        # ensures time axis is shared accross the two features
        if not np.allclose(times1, times2):
            raise ValueError('times are not equal')

        # merge properties of the two features
        properties = copy.deepcopy(self.properties)
        other_properties = copy.deepcopy(other.properties)
        properties.update(
            {k: v
             for k, v in other_properties.items() if k != 'pipeline'})
        if 'pipeline' not in properties:
            properties['pipeline'] = []
        if 'pipeline' in other_properties:
            for k in other_properties['pipeline']:
                properties['pipeline'].append(k)
                columns = properties['pipeline'][-1]['columns']
                properties['pipeline'][-1]['columns'] = [
                    columns[0] + self.ndims, columns[1] + self.ndims
                ]

        return Features(np.hstack((data1, data2)),
                        times1,
                        properties=properties)
Ejemplo n.º 13
0
def _init_config(config, log=get_logger('pipeline', 'warning')):
    try:
        if os.path.isfile(config):
            log.debug('loading configuration from %s', config)
            config = open(config, 'r').read()
    except TypeError:
        pass

    if isinstance(config, str):
        # the config is a string, try to load it as a YAML
        try:
            config = yaml.load(config, Loader=yaml.FullLoader)
        except yaml.YAMLError as err:
            raise ValueError(f'error in configuration: {err}')

    # ensure all the keys in config are known
    unknown_keys = [
        k for k in config.keys()
        if k not in list(PipelineManager.valid_processors) + ['pitch']
    ]
    if unknown_keys:
        raise ValueError('invalid keys in configuration: {}'.format(
            ', '.join(unknown_keys)))

    # ensure one and only one features processor is defined in the
    # configuration
    features = [k for k in config.keys() if k in valid_features()]
    if not features:
        raise ValueError(
            'the configuration does not define any features extraction '
            '(must have one and only one entry of {})'.format(', '.join(
                valid_features())))
    if len(features) > 1:
        raise ValueError(
            'more than one features extraction processors are defined, '
            '(must have one and only one entry of {}): {}'.format(
                ', '.join(valid_features()), ', '.join(features)))

    if 'vtln' in config and features[0] in ('spectrogram', 'bottleneck'):
        raise ValueError(f'{features[0]} features do not support VTLN')

    if 'cmvn' in config:
        # force by_speaker to False if not existing
        if 'by_speaker' not in config['cmvn']:
            log.warning('by_speaker option not specified for cmvn, '
                        'assuming it is false and doing cmvn by utterance')
            config['cmvn']['by_speaker'] = False
        # force with_vad to True if not existing
        if 'with_vad' not in config['cmvn']:
            config['cmvn']['with_vad'] = True

    # on pitch, make sure we have a 'postprocessing' entry
    if 'pitch' in config and 'postprocessing' not in config['pitch']:
        config['pitch']['postprocessing'] = {}

    # log message describing the pipeline configuration
    msg = []
    if 'pitch' in config:
        msg.append(f'{config["pitch"]["processor"]} pitch')
    if 'delta' in config:
        msg.append('delta')
    if 'cmvn' in config:
        msg.append('cmvn by {}{}'.format(
            'speaker' if config['cmvn']['by_speaker'] else 'utterance',
            ' with vad' if config['cmvn']['with_vad'] else ''))
    if 'vtln' in config:
        msg.append('vtln by {}'.format(
            'speaker' if config['vtln']['by_speaker'] else 'utterance'))

    log.info('pipeline configured for %s features extraction%s', features[0],
             ' with {}'.format(', '.join(msg)) if msg else '')

    return config
Ejemplo n.º 14
0
def extract_features(configuration,
                     utterances,
                     warps=None,
                     njobs=1,
                     log=get_logger('pipeline', 'warning')):
    """Speech features extraction pipeline

    Given a pipeline ``configuration`` and ``utterances`` defining a list of
    utterances on which to extract features, this function applies the whole
    pipeline and returns the extracted features as an instance of
    :class:`~shennong.features.features.FeaturesCollection`. It uses ``njobs``
    parallel subprocesses.

    Parameters
    ----------
    config : dict or str
        The pipeline configuration, can be a dictionary, a path to a
        YAML file or a string formatted in YAML. To get a
        configuration example, see :func:`get_default_config`
    utterances : :class:`~shennong.utterances.Utterances`
        The list of utterances to extract the features on.
    warps : dict, optional
        A dictionnary of precomputed VTLN warps coefficients to be applied on
        features. Must be a dict (str: float) of warps indexed either by
        utterances speaker or name. Both the ``warps`` argument and the
        config['vtln'] entry must not be defined together.
    njobs : int, optional
        The number to subprocesses to execute in parallel, use a
        single process by default.
    log : logging.Logger
        A logger to display messages during pipeline execution

    Returns
    -------
    features : :class:`~shennong.features.features.FeaturesCollection`
       The extracted speech features

    Raises
    ------
    ValueError
        If the ``configuration`` or the ``utterances`` are invalid, if both the
        ``warps`` argument and the 'vtln' entry in configuration are defined or
        if something goes wrong during features extraction.

    """
    # intialize the pipeline configuration, the list of wav files to
    # process, instanciate the pipeline processors and make all the
    # checks to ensure all is correct
    njobs = get_njobs(njobs, log=log)
    config = _init_config(configuration, log=log)

    log.info('detected format for utterances index is: %s',
             utterances.format(type=str))

    # make sure the warps are valid (not overloading 'vtln' in config and
    # either by speaker or by utterance. If defined per speaker convert them by
    # utterance)
    if warps:
        warps = _init_warps(warps, config, utterances, log)

    # check the OMP_NUM_THREADS variable for parallel computations
    _check_environment(njobs, log=log)

    # do all the computations
    return _extract_features(config, utterances, warps, njobs=njobs, log=log)