Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('utterances_file',
                        type=pathlib.Path,
                        help='path to the utterances')
    parser.add_argument('-j',
                        '--njobs',
                        type=int,
                        default=get_njobs(),
                        help='number of parallel jobs, default to %(default)s')
    args = parser.parse_args()

    output_file = str(args.utterances_file).replace('segments',
                                                    'warps').replace(
                                                        '.utt', '.warp')
    pathlib.Path(output_file).parent.mkdir(exist_ok=True)

    utterances = Utterances.load(args.utterances_file)
    processor = VtlnProcessor()
    processor.set_logger('info')
    warps = processor.process(utterances, njobs=args.njobs, group_by='speaker')

    with open(output_file, 'w') as fout:
        for spk, warp in sorted(warps.items()):
            fout.write(f'{spk} {warp}\n')
Esempio n. 2
0
def extract_features_warp(configuration, utterances, warp, log, njobs=1):
    """Speech features extraction pipeline when all features are warped
    by the same factor. Used in the
    :func:`~shennong.features.processor.vtln.VtlnProcessor.process`
    method of the :class:`~shennong.features.processor.vtln.VtlnProcessor`.
    """
    # intialize the pipeline configuration, the list of wav files to
    # process, instanciate the pipeline processors and make all the
    # checks to ensure all is correct
    njobs = get_njobs(njobs, log=log)
    config = _init_config(configuration, log=log)

    # check the OMP_NUM_THREADS variable for parallel computations
    _check_environment(njobs, log=log)

    manager = PipelineManager(config, utterances, log=log)

    # verbosity level for joblib (no joblib verbosity on debug level
    # (level <= 10) because each step is already detailed in inner
    # loops
    verbose = 8 if log.getEffectiveLevel() > 10 else 0

    return FeaturesCollection(
        _Parallel(f'features extraction with warp {warp}',
                  log,
                  n_jobs=njobs,
                  verbose=verbose,
                  prefer='threads')(joblib.delayed(_extract_single_pass_warp)(
                      utterance, manager, warp, log=log)
                                    for utterance in utterances))
Esempio n. 3
0
    def process_all(self, utterances, njobs=None, **kwargs):
        """Returns features processed from several input `utterances`

        This function processes the features in parallel jobs.

        Parameters
        ----------
        utterances: :class`~shennong.uttterances.Utterances`
            The utterances on which to process features on.
        njobs: int, optional
            The number of parallel jobs to run in background. Default
            to the number of CPU cores available on the machine.
        **kwargs: dict, optional
            Extra arguments to be forwarded to the `process` method. Keys must
            be the same as for `utterances`.

        Returns
        -------
        features: :class:`~shennong.features_collection.FeaturesCollection`
            The computed features on each input signal. The keys of
            output `features` are the keys of the input `utterances`.

        Raises
        ------
        ValueError
            If the `njobs` parameter is <= 0 or if an entry is missing in
            optioanl kwargs.

        """
        # checks the number of background jobs
        njobs = get_njobs(njobs, log=self.log)

        # check the extra arguments
        for name, value in kwargs.items():
            if not isinstance(value, dict):
                raise ValueError(f'argument "{name}" is not a dict')
            if value.keys() != utterances.by_name().keys():
                raise ValueError(
                    f'utterances and "{name}" have different names')

        def _process_one(utterance, **kwargs):
            return utterance.name, self.process(
                utterance.load_audio(),
                **{k: v[utterance.name]
                   for k, v in kwargs.items()})

        verbose = 8 if self.log.getEffectiveLevel() > 10 else 0

        return FeaturesCollection(
            joblib.Parallel(n_jobs=njobs, verbose=verbose, prefer='threads')(
                joblib.delayed(_process_one)(utt, **kwargs)
                for utt in utterances))
Esempio n. 4
0
    def process_all(self, signals, njobs=None):
        """Returns features processed from several input `signals`

        This function processes the features in parallel jobs.

        Parameters
        ----------
        signals: dict of :class`~shennong.audio.Audio`
            A dictionnary of input audio signals to process features
            on, where the keys are item names and values are audio
            signals.
        njobs: int, optional
            The number of parallel jobs to run in background. Default
            to the number of CPU cores available on the machine.

        Returns
        -------
        features: :class:`~shennong.features.features.FeaturesCollection`
            The computed features on each input signal. The keys of
            output `features` are the keys of the input `signals`.

        Raises
        ------
        ValueError
            If the `njobs` parameter is <= 0

        """
        # checks the number of background jobs
        njobs = get_njobs(njobs, log=self._log)

        def _process_one(name, signal):
            return name, self.process(signal)

        return FeaturesCollection(
            **{
                k: v
                for k, v in joblib.Parallel(
                    n_jobs=njobs, verbose=0, backend='threading')(
                        joblib.delayed(_process_one)(name, signal)
                        for name, signal in signals.items())
            })
Esempio n. 5
0
def main():
    """Train VTLN, extract warps and apply warped MFCC on Buckeye corpus"""
    parser = argparse.ArgumentParser()
    parser.add_argument('buckeye_corpus',
                        type=pathlib.Path,
                        help='path to the raw Buckeye Corpus')
    parser.add_argument('output_file',
                        type=pathlib.Path,
                        help='where to save the computed MFCCs')
    parser.add_argument(
        '-j',
        '--njobs',
        type=int,
        default=get_njobs(),
        help='number of parallel jobs to use, default to %(default)s')
    parser.add_argument('-d',
                        '--duration',
                        type=float,
                        default=10 * 60,
                        help=('speech duration per speaker for VTLN training, '
                              'default to %(default)s'))
    parser.add_argument('--warp-step',
                        type=float,
                        default=0.01,
                        help='VTLN warp step, default to %(default)s')
    parser.add_argument('--warp-min',
                        type=float,
                        default=0.85,
                        help='VTLN min warp, default to %(default)s')
    parser.add_argument('--warp-max',
                        type=float,
                        default=1.25,
                        help='VTLN max warp, default to %(default)s')
    args = parser.parse_args()

    # check input parameters
    if args.output_file.exists():
        raise ValueError(f'{args.output_file} already exists')
    if not args.buckeye_corpus.is_dir():
        raise ValueError(f'{args.buckeye_corpus} is not a directory')

    # generates utterances from the Buckeye corpus
    utterances = prepare_buckeye(args.buckeye_corpus)

    # extract 10m of speech per speaker to train VTLN
    vtln_utterances = utterances.fit_to_duration(args.duration)

    # compute the VTLN warps coefficients
    print(f'training VTLN on {args.duration}s per speaker '
          f'({len(vtln_utterances)} utterances)')
    processor = VtlnProcessor(warp_step=args.warp_step,
                              min_warp=args.warp_min,
                              max_warp=args.warp_max)
    processor.set_logger('info')
    warps = processor.process(vtln_utterances,
                              njobs=args.njobs,
                              group_by='speaker')

    print('VTLN warps per speaker are:')
    for spk, warp in sorted(warps.items()):
        print(f'{spk}: {warp}')

    # convert warps from speaker to utterance in the whole corpus
    warps = {utt.name: warps[utt.speaker] for utt in utterances}

    print(f'computing warped MFCCs for {len(utterances)} uttterances')
    features = MfccProcessor().process_all(utterances,
                                           vtln_warp=warps,
                                           njobs=args.njobs)

    print(f'writing MFCCs to {args.output_file}')
    features.save(args.output_file)
Esempio n. 6
0
def extract_features(configuration, utterances_index,
                     njobs=1, log=get_logger()):
    """Speech features extraction pipeline

    Given a pipeline ``configuration`` and an ``utterances_index``
    defining a list of utterances on which to extract features, this
    function applies the whole pipeline and returns the extracted
    features as an instance of
    :class:`~shennong.features.features.FeaturesCollection`. It uses
    ``njobs`` parallel subprocesses.

    The utterances in the ``utterances_index`` can be defined in one
    of the following format (the format must be homogoneous across the
    index, i.e. only one format can be used):

    * 1-uple (or str): ``<wav-file>``
    * 2-uple: ``<utterance-id> <wav-file>``
    * 3-uple: ``<utterance-id> <wav-file> <speaker-id>``
    * 4-uple: ``<utterance-id> <wav-file> <tstart> <tstop>``
    * 5-uple: ``<utterance-id> <wav-file> <speaker-id> <tstart> <tstop>``

    Parameters
    ----------
    config : dict or str
        The pipeline configuration, can be a dictionary, a path to a
        YAML file or a string formatted in YAML. To get a
        configuration example, see :func:`get_default_config`
    utterances_index : sequence of tuples
        The list of utterances to extract the features on.
    njobs : int, optional
        The number to subprocesses to execute in parallel, use a
        single process by default.
    log : logging.Logger
        A logger to display messages during pipeline execution

    Returns
    -------
    features : :class:`~shennong.features.features.FeaturesCollection`
       The extracted speech features

    Raises
    ------
    ValueError
        If the ``configuration`` or the ``utterances_index`` are
        invalid, or if something goes wrong during features
        extraction.

    """
    # intialize the pipeline configuration, the list of wav files to
    # process, instanciate the pipeline processors and make all the
    # checks to ensure all is correct
    njobs = get_njobs(njobs, log=log)
    config = _init_config(configuration, log=log)
    utterances = _init_utterances(utterances_index, log=log)

    # check the OMP_NUM_THREADS variable for parallel computations
    _check_environment(njobs, log=log)

    # do all the computations
    return _extract_features(
        config, utterances, njobs=njobs, log=log)
Esempio n. 7
0
def extract_features(configuration,
                     utterances,
                     warps=None,
                     njobs=1,
                     log=get_logger('pipeline', 'warning')):
    """Speech features extraction pipeline

    Given a pipeline ``configuration`` and ``utterances`` defining a list of
    utterances on which to extract features, this function applies the whole
    pipeline and returns the extracted features as an instance of
    :class:`~shennong.features.features.FeaturesCollection`. It uses ``njobs``
    parallel subprocesses.

    Parameters
    ----------
    config : dict or str
        The pipeline configuration, can be a dictionary, a path to a
        YAML file or a string formatted in YAML. To get a
        configuration example, see :func:`get_default_config`
    utterances : :class:`~shennong.utterances.Utterances`
        The list of utterances to extract the features on.
    warps : dict, optional
        A dictionnary of precomputed VTLN warps coefficients to be applied on
        features. Must be a dict (str: float) of warps indexed either by
        utterances speaker or name. Both the ``warps`` argument and the
        config['vtln'] entry must not be defined together.
    njobs : int, optional
        The number to subprocesses to execute in parallel, use a
        single process by default.
    log : logging.Logger
        A logger to display messages during pipeline execution

    Returns
    -------
    features : :class:`~shennong.features.features.FeaturesCollection`
       The extracted speech features

    Raises
    ------
    ValueError
        If the ``configuration`` or the ``utterances`` are invalid, if both the
        ``warps`` argument and the 'vtln' entry in configuration are defined or
        if something goes wrong during features extraction.

    """
    # intialize the pipeline configuration, the list of wav files to
    # process, instanciate the pipeline processors and make all the
    # checks to ensure all is correct
    njobs = get_njobs(njobs, log=log)
    config = _init_config(configuration, log=log)

    log.info('detected format for utterances index is: %s',
             utterances.format(type=str))

    # make sure the warps are valid (not overloading 'vtln' in config and
    # either by speaker or by utterance. If defined per speaker convert them by
    # utterance)
    if warps:
        warps = _init_warps(warps, config, utterances, log)

    # check the OMP_NUM_THREADS variable for parallel computations
    _check_environment(njobs, log=log)

    # do all the computations
    return _extract_features(config, utterances, warps, njobs=njobs, log=log)