예제 #1
0
    def score_corpus(self, input_corpus, targets_corpus, loss='rmse'):
        """Iteratively compute the network's likelihood or prediction error.

        input_corpus   : sequence of input data arrays
        targets_corpus : sequence of true targets arrays
        loss           : quantity to score ; either 'likelihood' of the
                         produced GMM or 'rmse' of the derived prediction

        Corpora of input and target data must include numpy arrays
        of values to feed to the network and to evaluate against,
        without any nested arrays structure.
        """
        # Add an argument unneeded by parent; pylint: disable=arguments-differ
        # Check 'loss' argument validity. Handle the rmse metric case.
        check_type_validity(loss, str, 'loss')
        if loss == 'rmse':
            return super().score_corpus(input_corpus, targets_corpus)
        if loss != 'likelihood':
            raise ValueError("Unknown loss quantity: '%s'.")
        # Handle the likelihood metric case.
        # Compute sample-wise likelihoods.
        aggregate = np.array if len(self.input_shape) == 2 else np.concatenate
        scores = aggregate([
            self.score(input_data, targets, loss='likelihood')
            for input_data, targets in zip(input_corpus, targets_corpus)
        ])
        # Gather samples' lengths. Reduce scores and return them.
        sizes = self._get_corpus_sizes(input_corpus).ravel()
        return np.sum(scores * sizes) / sizes.sum()
예제 #2
0
def filter_1d_signal(signal, filt):
    """Apply a filter to a one-dimensional signal.

    signal : signal to filter, of shape (signal length, n_channels)
             (or (signal length,) in case of a single channel)
    filt   : filter to apply, of shape (n_channels, filter width)
    """
    check_type_validity(signal, tf.Tensor, 'signal')
    check_type_validity(filt, tf.Tensor, 'filt')
    # Check the signal's shape and adjust it if needed.
    one_dimensional = (len(signal.shape) == 1)
    if one_dimensional:
        signal = tf.expand_dims(signal, 1)
    elif len(signal.shape) > 2:
        raise ValueError("'signal' rank is not in [1, 2].")
    # Convolve the signal and the filter.
    convolved = tf.nn.conv1d(tf.expand_dims(tf.transpose(signal), -1),
                             tf.expand_dims(tf.transpose(filt), 1),
                             stride=1,
                             padding='SAME')
    # Gather the results and return them.
    index = tf.expand_dims(tf.range(signal.shape[1], dtype=tf.int32), 1)
    filtered = tf.gather_nd(tf.transpose(convolved, [0, 2, 1]),
                            tf.concat([index, index], axis=1))
    return filtered[0] if one_dimensional else tf.transpose(filtered)
예제 #3
0
파일: _wav.py 프로젝트: tianchi03/ac2art
def linear_predictive_coding(frames, n_coeff=20):
    """Return linear predictive coding coefficients for each audio frame.

    frames  : 2-D numpy.ndarray where each line represents an audio frame
    n_coeff : number of LPC coefficients to generate (also equal to the
              maximum autocorrelation lag order considered)
    """
    # Check arguments validity. Adjust the number of coefficients.
    check_type_validity(frames, np.ndarray, 'frames')
    if frames.ndim != 2:
        raise ValueError('`frames` should be a 2-D np.array.')
    check_type_validity(n_coeff, int, 'n_coeff')
    if n_coeff < 1:
        raise ValueError('`n_coeff` should be a strictly positive int.')
    n_coeff = min(n_coeff, frames.shape[1] - 1)
    # Compute the frame-wise LPC coefficients.
    autocorrelations = librosa.autocorrelate(frames, n_coeff + 1)
    lpc = np.array([
        # Levinson-Durbin recursion. False positive pylint: disable=no-member
        scipy.linalg.solve_toeplitz(autocorr[:-1], autocorr[1:])
        for autocorr in autocorrelations
    ])
    # Compute the frame_wise root mean squared prediction errors.
    frame_wise_errors = np.array([
        frames[:, i] - np.sum(lpc * frames[:, i - n_coeff:i][:, ::-1], axis=1)
        for i in range(n_coeff, frames.shape[1])
    ])
    frames_rmse = np.sqrt(np.mean(np.square(frame_wise_errors), axis=0))
    # Return the LPC coefficients and error terms.
    return lpc, frames_rmse
예제 #4
0
파일: _wav.py 프로젝트: tianchi03/ac2art
    def get(self, features, n_coeff, static_only):
        """Wrap the call to any features-producing method.

        features    : kind of features to produce
                      (will call self.get_`features`)
        n_coeff     : number of coefficients to produce
        static_only : whether to return the sole static features
                      instead of adding delta and deltadelta ones
        """
        check_type_validity(features, str, 'features')
        method = getattr(self, 'get_' + features)
        return method(n_coeff, static_only)
예제 #5
0
파일: _gan.py 프로젝트: tianchi03/ac2art
 def _validate_args(self):
     """Process the initialization arguments of the instance."""
     # Validate arguments defining the generator network.
     super()._validate_args()
     # Validate the discriminator network's hidden layers config.
     check_type_validity(self.discr_config, list, 'discr_config')
     for i, config in enumerate(self.discr_config):
         validated = validate_layer_config(config)
         if isinstance(get_layer_class(validated[0]), SignalFilter):
             raise ValueError(
                 'Discrimnator layers may not contain signal filters.')
         self.discr_config[i] = validated
예제 #6
0
파일: _abx.py 프로젝트: tianchi03/ac2art
    def _setup_features_loader(audio_features, ema_features, inverter,
                               dynamic_ema, articulators):
        """Build a function to load features associated with an utterance.

        See `extract_h5_features` documentation for arguments.
        """
        nonlocal load_acoustic, load_ema
        # Check that provided arguments make sense.
        if audio_features is None and ema_features is None:
            raise RuntimeError('No features were set to be included.')
        if inverter is not None:
            check_type_validity(inverter, (NeuralNetwork, None), 'inverter')
            if audio_features is None:
                raise RuntimeError(
                    'No acoustic features specified to feed the inverter.')
            elif ema_features is not None:
                raise RuntimeError(
                    'Both ema features and an inverter were specified.')
        # Build the acoustic features loading function.
        if audio_features is not None:
            window = (0 if inverter is None or inverter.input_shape[-1] % 11
                      else 5)
            load_audio = functools.partial(load_acoustic,
                                           audio_type=audio_features,
                                           context_window=window)
            # Optionally build and return an inverter-based features loader.
            if inverter is not None:

                def invert_features(utterance):
                    """Return the features inverted from an utterance."""
                    pred = inverter.predict(load_audio(utterance))
                    return pred

                return invert_features
            if ema_features is None:
                return load_audio
        # Build the articulatory features loading function.
        if ema_features is not None:
            load_articulatory = functools.partial(load_ema,
                                                  norm_type=ema_features,
                                                  use_dynamic=dynamic_ema,
                                                  articulators=articulators)
            if audio_features is None:
                return load_articulatory
        # When appropriate, build a global features loading function.
        def load_features(utterance):
            """Load the features associated with an utterance."""
            return np.concatenate(
                [load_audio(utterance),
                 load_articulatory(utterance)], axis=1)

        return load_features
예제 #7
0
    def __init__(self, filename):
        """Initialize the instance.

        filename : path to the target file (str)
        """
        check_type_validity(filename, str, 'filename')
        # Set up the instance's attributes.
        self.filename = os.path.abspath(filename)
        self.data = None
        self.column_names = {}
        self.time_index = None
        # Load the attributes' values from the file.
        self.load()
예제 #8
0
    def __init__(
            self, input_data, layers_shape, batch_sizes=None,
            cell_type='lstm', activation='tanh', name='rnn', keep_prob=None
        ):
        """Instantiate the recurrent neural network.

        input_data   : input data of the network (tensorflow.Tensor),
                       either of shape [n_batches, max_time, input_size]
                       or [len_sequence, input_size]
        layers_shape : number of units per layer (int or tuple of int)
        batch_sizes  : true lengths of the batched sequences
                       (for input tensors of rank 3 only)
        cell_type    : type of recurrent cells to use (short name (str)
                       or tensorflow.nn.rnn_cell.RNNCell subclass,
                       default 'lstm', i.e. LSTMCell)
        activation   : activation function of the cell units (function
                       or function name, default 'tanh')
        name         : name of the stack (using the same name twice
                       will cause tensorflow to raise an exception)
        keep_prob    : optional Tensor recording a keep probability
                       used as a dropout parameter

        This needs overriding by subclasses to actually build the network
        out of the pre-validated arguments. The `weights` argument should
        also be filled in by subclasses.
        """
        # Arguments serve modularity; pylint: disable=too-many-arguments
        # Check name validity.
        check_type_validity(name, str, 'name')
        self.name = name
        # Check input data valitidy.
        check_type_validity(input_data, tf.Tensor, 'input_data')
        if len(input_data.shape) == 2:
            self.input_data = tf.expand_dims(input_data, 0)
            self.batch_sizes = None
            self._batched = False
        elif len(input_data.shape) == 3:
            self.input_data = input_data
            if batch_sizes is None:
                raise ValueError(
                    "With rank 3 'input_data', 'batch_sizes' is mandatory.")
            self.batch_sizes = batch_sizes
            self._batched = True
        else:
            raise TypeError("Invalid 'input_data' rank: should be 2 or 3.")
        # Check layers shape validity.
        check_type_validity(layers_shape, (tuple, int), 'layers_shape')
        if isinstance(layers_shape, int):
            layers_shape = (layers_shape,)
        for value in layers_shape:
            check_positive_int(value, "layer's number of units")
        self.layers_shape = layers_shape
        # Check keep_prob validity.
        check_type_validity(keep_prob, (tf.Tensor, type(None)), 'keep_prob')
        self.keep_prob = keep_prob
        # Set up the RNN cell type and activation function.
        self.cell_type = setup_rnn_cell_type(cell_type)
        self.activation = setup_activation_function(activation)
        # Set up an argument that needs assigning by subclasses.
        self.weights = None
예제 #9
0
def setup_rnn_cell_type(cell_type):
    """Validate and return a tensorflow RNN cell type.

    cell_type : either an actual cell type, returned as is,
                or a cell type name, from which the actual
                type is looked for and returned.
    """
    check_type_validity(cell_type, (str, type), 'cell_type')
    if isinstance(cell_type, str):
        return get_object(cell_type, RNN_CELL_TYPES, 'RNN cell type')
    if issubclass(cell_type, tf.nn.rnn_cell.RNNCell):
        return cell_type
    raise TypeError(
        "'cell_type' is not a tensorflow.nn.rnn_cell.RNNCell subclass.")
예제 #10
0
def build_layers_stack(input_tensor,
                       layers_config,
                       keep_prob=None,
                       batch_sizes=None,
                       check_config=True):
    """Build a stack of neural layers, rnn substacks and signal filters.

    input_tensor  : tensorflow.Tensor fed to the first layer of the stack
    layers_config : list of tuples specifying the stack's layers ; each
                    tuple should contain a layer type (or its shortname),
                    a primary argument (number of units, or cutoff frequency
                    for signal filters) and an optional dict of keyword
                    arguments used to instantiate the layer
    keep_prob     : optional tensor specifying dropout keep probability
    batch_sizes   : optional tensor specifying true sequences lengths
                    when using batches of data sequences
    check_config  : whether to check `layers_config` to be valid
                    (bool, default True)
    """
    # Optionally check the layers_config argument's validity.
    if check_config:
        check_type_validity(layers_config, list, 'layers_config')
        for i, config in enumerate(layers_config):
            layers_config[i] = validate_layer_config(config)
    # Build the layers' stack container and a type-wise layers counter.
    layers_stack = OrderedDict([])
    layers_counter = {}
    # Iteratively build the layers.
    for name, n_units, kwargs in layers_config:
        # Get the layer's class and give the layer a name.
        layer_class = get_layer_class(name)
        layer_name = kwargs.pop(
            'name', name + '_%s' % layers_counter.setdefault(name, 0))
        # Handle dropout and naming, if relevant.
        if issubclass(layer_class, (DenseLayer, AbstractRNN)):
            kwargs = kwargs.copy()
            kwargs.setdefault('keep_prob', keep_prob)
            kwargs['name'] = layer_name
            # Avoid RNN scope issues. Feed batch sizes, if any.
            if issubclass(layer_class, AbstractRNN):
                kwargs['name'] += '_%s' % int(time.time())
                kwargs['batch_sizes'] = batch_sizes
        # Instantiate the layer.
        layer = layer_class(input_tensor, n_units, **kwargs)
        # Add the layer to the stack and use its output as next input.
        layers_stack[layer_name] = layer
        layers_counter[name] += 1
        input_tensor = layer.output
    # Return the layers stack.
    return layers_stack
예제 #11
0
    def __init__(self,
                 input_data,
                 n_units,
                 activation='relu',
                 bias=True,
                 name='DenseLayer',
                 keep_prob=None):
        """Initialize the fully-connected neural layer.

        input_data : tensorflow variable or placeholder to use as input
        n_units    : number of units of the layer
        activation : activation function or activation function name
                     (default 'relu', i.e. `tensorflow.nn.relu`)
        bias       : whether to add a bias constant to the transformed data
                     passed to the activation function (bool, default True)
        name       : optional name to give to the layer's inner operations
        keep_prob  : optional Tensor recording a keep probability to use
                     as a dropout parameter
        """
        # Arguments serve modularity; pylint: disable=too-many-arguments
        # Check the arguments' validity.
        check_type_validity(input_data, tf.Tensor, 'input_data')
        if len(input_data.shape) not in (2, 3):
            raise TypeError("`input_data` must be of rank 2 or 3.")
        check_positive_int(n_units, 'n_units')
        check_type_validity(bias, bool, 'bias')
        check_type_validity(name, str, 'name')
        check_type_validity(keep_prob, (tf.Tensor, type(None)), 'keep_prob')
        # Set up the layer's activation function.
        self.activation = setup_activation_function(activation)
        # Set up the layer's weights, adjusting their initial value.
        # note: design loosely based on Glorot, X. & Bengio, Y. (2010)
        if self.activation is tf.nn.relu:
            stddev = np.sqrt(2 / n_units)
        elif self.activation in [tf.nn.tanh, tf.nn.softmax]:
            stddev = np.sqrt(3 / n_units)
        else:
            stddev = .1
        initial = tf.truncated_normal([input_data.shape[-1].value, n_units],
                                      mean=0,
                                      stddev=stddev)
        self.weights = tf.Variable(initial, name=name + '_weight')
        # Optionally set up a learnable bias term.
        if bias:
            self.bias = tf.Variable(tf.constant(0.1, shape=[n_units]),
                                    name=name + '_bias')
        else:
            self.bias = None
        # Build the layer's processing of the inputs.
        if len(input_data.shape) == 2:
            self.output = self._feed_tensor(input_data)
        else:
            self.output = run_along_first_dim(self._feed_tensor, input_data)
        # Optionally set up dropout on top of the layer.
        self.keep_prob = keep_prob
        if self.keep_prob is not None:
            self.output = tf.nn.dropout(self.output, keep_prob=keep_prob)
예제 #12
0
def copy_feats(input_file, output_file):
    """Copy data from a (set of) ark file(s) to a txt file.

    input_file  : path to a .ark, ark-like .txt file or to a .scp
                  file indexing data from one or more .ark files
                  whose data to copy
    output_file : path to a .ark, ark-like.txt file or .scp file
                  to create (.scp file are doubled with a .ark one)
    """
    # Check the arguments' validity.
    check_type_validity(input_file, str, 'input_file')
    check_type_validity(output_file, str, 'output_file')
    if not os.path.exists(input_file):
        raise FileNotFoundError("'%s' does not exist." % input_file)
    # Set up the input file's handling descriptor.
    in_ext = input_file.rsplit('.', 1)[-1]
    if in_ext == 'txt':
        input_file = 't,ark:' + input_file
    elif in_ext in ('ark', 'scp'):
        input_file = input_file[-3:] + ':' + input_file
    else:
        raise TypeError(
            'Invalid input file extension: should be ark, txt or scp.')
    # Set up the output file's handling descriptor.
    out_ext = output_file.rsplit('.', 1)[-1]
    if out_ext == 'ark':
        output_file = 'ark:' + output_file
    elif out_ext == 'scp':
        output_file = 'ark,scp:{0}.ark,{0}.scp'.format(output_file[:-4])
    elif out_ext == 'txt':
        output_file = 't,ark:' + output_file
    else:
        raise TypeError(
            'Invalid output file extension: should be ark, txt or scp.')
    # Set up the copy-feats command.
    command = os.path.join(CONSTANTS['kaldi_folder'], 'src/featbin/copy-feats')
    command += ' %s %s' % (input_file, output_file)
    # Run ark-to-txt conversion.
    status = os.system(command)
    if status != 0:
        raise RuntimeError(
            'kaldi copy-feats (%s to %s) exited with error code %s.' %
            (in_ext, out_ext, status))
    print('Successfully ran %s to %s conversion with kaldi.' %
          (in_ext, out_ext))
예제 #13
0
파일: _split.py 프로젝트: tianchi03/ac2art
def store_filesets(filesets, corpus):
    """Write lists of utterances defining sub-filesets of a corpus.

    filesets : dict associating a list of utterances' names to str
               keys serving as fileset names
    corpus  : name of the corpus which the filesets concern (str)
    """
    check_type_validity(filesets, dict, 'filesets')
    # Build the output 'filesets' folder if needed.
    base_folder = CONSTANTS['%s_processed_folder' % corpus]
    output_folder = os.path.join(base_folder, 'filesets')
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    # Iteratively write the filesets to their own txt file.
    for set_name, fileset in filesets.items():
        path = os.path.join(output_folder, set_name + '.txt')
        with open(path, 'w', encoding='utf-8') as file:
            file.write('\n'.join(fileset))
예제 #14
0
def update_scp(scp_file, folder=''):
    """Update all paths in a .scp indexing file.

    scp_file : path to the scp file to alter (str)
    folder   : path to the folder containing the input files, either
               relative or absolute (str, default '', i.e. folder
               from which kaldi commands targetting the scp file are
               to be called)
    """
    check_type_validity(folder, str, 'folder')
    os.system('mv %s %s' % (scp_file, scp_file + '.tmp'))
    with open(scp_file + '.tmp', 'r') as infile:
        with open(scp_file, 'w') as outfile:
            for row in infile:
                utterance, path = row.split(' ')
                path = os.path.join(folder, os.path.basename(path))
                outfile.write(utterance + ' ' + path)
    os.system('rm %s.tmp' % scp_file)
예제 #15
0
    def __init__(self, signal, cutoff, learnable=True, **kwargs):
        """Initialize the filter.

        signal    : signal to filter (tensorflow.Tensor of rank 1 to 3)
        cutoff    : cutoff frequency (or frequencies) of the filter, in Hz
                    (positive float or list, array or Tensor of such values)
        learnable : whether the cutoff frequency may be adjusted, e.g. in
                    backpropagation procedures (bool, default True)

        Subclasses may pass on any filter-designing keyword arguments.

        Note: three-dimensional signals are treated as a batch of 2-D
              signals stacked along the first dimension, and are filtered
              as such, i.e. independently.
        """
        # Check signal validity and shape.
        check_type_validity(signal, tf.Tensor, 'signal')
        if len(signal.shape) not in (1, 2, 3):
            raise ValueError("'signal' rank is not in [1, 2, 3].")
        self.n_channels = signal.shape[-1] if len(signal.shape) > 1 else 1
        # Check and assign the cutoff frequencies of the filter.
        self.cutoff = None
        self._build_cutoff(cutoff)
        # Optionally set the cutoff frequencies to be learnable.
        check_type_validity(learnable, bool, 'learnable')
        self.learnable = learnable
        if self.learnable:
            self.cutoff = tf.Variable(self.cutoff)
        # Set up the actual filter.
        self.filter = None
        self._build_filter(**kwargs)
        # Compute the filter's output.
        if len(signal.shape) <= 2:
            self.output = filter_1d_signal(signal, self.filter)
        else:
            self.output = run_along_first_dim(filter_1d_signal, signal,
                                              self.filter)
        # Record the instance's configuration.
        self.configuration = {
            'class': self.__class__.__module__ + '.' + self.__class__.__name__,
            'learnable': self.learnable
        }
        self.configuration.update(kwargs)
예제 #16
0
def sequences_to_batch(sequences, length=None):
    """Batch a set of data sequences into a three-dimensional array.

    sequences : list of array of two-dimensional numpy arrays sharing
                the same shape on their last dimension
    length    : optional size of the batched array's second dimension
                (otherwise, maximum sample length is used)

    Return a numpy.array of shape [n_sequences, length, sequences_width].
    """
    # Check sequences argument validity.
    check_type_validity(sequences, (list, np.ndarray), 'sequences')
    if isinstance(sequences, np.ndarray):
        if len(sequences.shape) == 2:
            sequences = [sequences]
        elif len(sequences.shape) != 1:
            raise TypeError(
                "'sequences' must be a list or a flat numpy array.")
    width = sequences[0].shape[1]
    valid = all(
        isinstance(sequence, np.ndarray) and len(sequence.shape) == 2
        and sequence.shape[1] == width for sequence in sequences)
    if not valid:
        raise TypeError(
            "All sequences must be 2-D numpy arrays of same number of columns."
        )
    # Gather the length of each and every sequence, truncated if needed.
    if length is None:
        batch_sizes = np.array([len(sequence) for sequence in sequences])
        length = np.max(batch_sizes)
    else:
        check_positive_int(length, 'length')
        batch_sizes = np.array(
            [min(len(sequence), length) for sequence in sequences])
    # Zero-pad the sequences and concatenate them.
    batched = np.array([
        np.concatenate(
            [seq[:length],
             np.zeros((length - seq_length, seq.shape[1]))])
        for seq, seq_length in zip(sequences, batch_sizes)
    ])
    # Return the batched sequences and the true sequence lengths.
    return batched, batch_sizes
예제 #17
0
파일: _abx.py 프로젝트: tianchi03/ac2art
    def abx_from_features(features,
                          fileset=None,
                          byspeaker=True,
                          limit_phones=False,
                          n_jobs=1):
        """Run the ABXpy pipeline on a set of pre-extracted {0} features.

        features     : name of a h5 file of {0} features created with
                       the `extract_h5_features` function (str)
        fileset      : optional name of a fileset whose utterances'
                       features to use (str)
        byspeaker    : whether to discriminate pairs from the same
                       speaker only (bool, default True)
        limit_phones : whether to aggregate some phonemes, using
                       the 'ipa_reduced' column of the {0} symbols
                       file as mapping (bool, default False)
        n_jobs       : number of CPU cores to use (positive int, default 1)
        """
        nonlocal abx_folder, corpus, make_abx_task
        check_type_validity(features, str, 'features')
        check_type_validity(fileset, (str, type(None)), 'fileset')
        check_positive_int(n_jobs, 'n_jobs')
        # Declare the path to the task file.
        task_name = get_task_name(fileset, limit_phones)
        task_name += 'byspk_' * byspeaker
        task_file = os.path.join(abx_folder, task_name + 'task.abx')
        # Declare paths to the input features and output scores files.
        features_file = os.path.join(abx_folder, features + '.features')
        scores_file = features + '_' + task_name.split('_', 1)[1] + 'abx.csv'
        scores_file = os.path.join(abx_folder, scores_file)
        # Check that the features file exists.
        if not os.path.exists(features_file):
            raise FileNotFoundError("No such file: '%s'." % features_file)
        # Build the ABX task file if necessary.
        if not os.path.isfile(task_file):
            make_abx_task(fileset, byspeaker, limit_phones)
        else:
            print('Using found %s file.' % task_file)
        # Run the ABXpy pipeline.
        abxpy_pipeline(features_file, task_file, scores_file, n_jobs)
        # Replace phone symbols with IPA ones in the scores file.
        add_ipa_symbols(scores_file)
예제 #18
0
def interpolate_missing_values(array):
    """Fill NaN values in a 1-D numpy array by cubic spline interpolation."""
    # Check array's type validity.
    check_type_validity(array, np.ndarray, 'array')
    if array.ndim > 1:
        raise TypeError("'array' must be one-dimensional.")
    # Identify NaN values. If there aren't any, simply return the array.
    is_nan = np.isnan(array)
    if is_nan.sum() == 0:
        return array
    array = array.copy()
    not_nan = ~is_nan
    # Build a cubic spline out of non-NaN values.
    spline = scipy.interpolate.splrep(np.argwhere(not_nan).ravel(),
                                      array[not_nan],
                                      k=3)
    # Interpolate missing values and replace them.
    for i in np.argwhere(is_nan).ravel():
        array[i] = scipy.interpolate.splev(i, spline)
    return array
예제 #19
0
    def score(self, input_data, targets, loss='rmse'):
        """Return a given metric evaluating the network.

        input_data : input data sample to evalute the model on which
        targets    : true targets associated with the input dataset
        loss       : quantity to score ; either 'likelihood' of the
                     produced GMM or 'rmse' of the derived prediction
        """
        # Add an argument unneeded by parents; pylint: disable=arguments-differ
        # Check 'loss' argument validity and select the associated metric.
        check_type_validity(loss, str, 'loss')
        if loss == 'rmse':
            metric = self.readouts['rmse']
        elif loss == 'likelihood':
            metric = self.readouts['mean_log_likelihood']
        else:
            raise ValueError("Unknown loss quantity: '%s'.")
        # Evaluate the selected metric.
        feed_dict = self.get_feed_dict(input_data, targets, loss=loss)
        return metric.eval(feed_dict, self.session)
예제 #20
0
 def _validate_args(self):
     """Process the initialization arguments of the instance."""
     # Validate arguments that do not define the model's layers.
     super()._validate_args()
     # Check input_shape and use_dynamic parameters' compatibility.
     if self.use_dynamic and self.input_shape[-1] % 3:
         raise ValueError("Wrong `input_shape` with `use_dynamic=True`: "
                          "dim 1 should a divisor of 3.")
     # Validate and alter if needed the network's layers' configuration.
     for half in ('encoder', 'decoder'):
         # Check the network's half's hidden layers' configuration.
         layers_config = self._init_arguments[half + '_config']
         check_type_validity(layers_config, list, half + '_config')
         for i, config in enumerate(layers_config):
             layers_config[i] = validate_layer_config(config)
         # Check the network's half's top filter's configuration.
         top_filter = self._init_arguments[half + '_filter']
         if top_filter is not None:
             self._init_arguments[half + '_filter'] = (
                 validate_layer_config(top_filter))
예제 #21
0
def ark_to_npy(ark_file, output_folder):
    """Extract utterances data from a (set of) ark file(s) to npy files.

    ark_file      : ark file whose data to extract - may be a .ark
                    file, a .scp file indexing one or more .ark files
                    or a .txt file created from one of the previous
    output_folder : folder in which to extract utterance-wise npy files
                    (note that existing files will be overwritten)
    """
    # Check parameters validity. Build output folder if needed.
    check_type_validity(ark_file, str, 'ark_file')
    check_type_validity(output_folder, str, 'output_folder')
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    # Iteratively extract utterances' data and write individual npy files.
    n_files = 0
    for utterance, array in read_ark_file(ark_file):
        output_file = os.path.join(output_folder, utterance + '.npy')
        np.save(output_file, array)
        n_files += 1
    print("Succesfully extracted %s utterances' data to .npy files." % n_files)
예제 #22
0
파일: _wav.py 프로젝트: tianchi03/ac2art
    def __init__(self,
                 filename,
                 sampling_rate=16000,
                 frame_time=25,
                 hop_time=10):
        """Load the .wav data and reframe it.

        filename    : path to the .wav audio file.
        sample_rate : sampling rate of the signal, in Hz; resampling
                      will be used if needed (int, default 16000)
        frame_time  : frames duration, in milliseconds (int, default 25)
        hop_time    : number of milliseconds between each frame's
                      start time (int, default 10)
        """
        check_type_validity(filename, str, 'filename')
        self.filename = os.path.abspath(filename)
        data, sampling_rate = librosa.load(self.filename, sr=sampling_rate)
        self.signal = data
        self.sampling_rate = sampling_rate
        self.frame_length = int((frame_time * sampling_rate) / 1000)
        self.hop_length = int((hop_time * sampling_rate) / 1000)
예제 #23
0
 def _validate_args(self):
     """Process the initialization arguments of the instance."""
     # Validate the model's input layer shape.
     check_type_validity(self.input_shape, (tuple, list, tf.TensorShape),
                         'input_shape')
     if len(self.input_shape) not in [2, 3]:
         raise TypeError("'input_shape' must be of length 2 or 3.")
     if self.input_shape[-1] is None:
         raise ValueError("Last 'input_shape' dimension must be fixed.")
     # Validate the model's layers configuration.
     check_type_validity(self.layers_config, list, 'layers_config')
     for i, config in enumerate(self.layers_config):
         self.layers_config[i] = validate_layer_config(config)
     # Validate the model's optional top layer configuration.
     if self.top_filter is not None:
         self._init_arguments['top_filter'] = (validate_layer_config(
             self.top_filter))
     # Validate the model's number of targets and their specification.
     check_positive_int(self.n_targets, 'n_targets')
     check_type_validity(self.use_dynamic, bool, 'use_dynamic')
     check_type_validity(self.binary_tracks, (list, type(None)),
                         'binary_tracks')
     if self.binary_tracks is not None:
         if self.binary_tracks:
             invalid = not all(
                 isinstance(track, int) and 0 <= track < self.n_targets
                 for track in self.binary_tracks)
             if invalid:
                 raise TypeError(
                     "'binary_tracks' should be a list of int in [0, %s]" %
                     (self.n_targets - 1))
         else:
             self._init_arguments['binary_tracks'] = None
     # Validate the model's normalization parameters.
     norm_params = self.norm_params
     check_type_validity(norm_params, (np.ndarray, type(None)),
                         'norm_params')
     if norm_params is not None and norm_params.shape != (self.n_targets, ):
         raise TypeError("Wrong 'norm_params' shape: %s instead of (%s,)" %
                         (norm_params.shape, self.n_targets))
예제 #24
0
def prepare_abkhazia_corpus(corpus,
                            data_folder,
                            limit_phones=True,
                            mode='w',
                            id_length=None):
    """Build or complete a corpus's data/ folder for use with abkhazia.

    corpus       : name of the corpus whose data to prepare (str)
    data_folder  : path to the 'data/' folder to build or complete
    limit_phones : whether to map the corpus' phones to a restricted set
                   of IPA phones, thus aggregating some (bool, default True)
    mode         : file writing mode (either 'w' or 'a', default 'w')
    id_length    : optional fixed length of utterances' id used internally

    Note: the `mode` and `id_length` parameters may be used to pile up
          data from multiple corpora in a single data/ folder, thus
          having abkhazia treat them as one large corpus. In this case,
          please be careful about corpus-specific phone symbols overlap.
    """
    # Check arguments validity.
    check_type_validity(corpus, str, 'corpus')
    check_type_validity(data_folder, str, 'data_folder')
    check_type_validity(limit_phones, bool, 'limit_phones')
    if mode not in ('w', 'a'):
        raise TypeError("'mode' should be a str in {'a', 'w'}.")
    check_type_validity(id_length, (int, type(None)), 'id_length')
    # Make the output directories if needed.
    wav_folder = os.path.join(data_folder, 'wavs')
    for folder in (data_folder, wav_folder):
        if not os.path.isdir(folder):
            os.makedirs(folder)
    # Gather dependency functions.
    copy_wavs, get_transcription = import_from_string(
        'ac2art.corpora.%s.abkhazia._loaders' % corpus,
        ['copy_wavs', 'get_transcription'])
    # Copy wav files to the data folder and gather the utterances list.
    utt_files = copy_wavs(wav_folder)
    utt_ids = normalize_utterance_ids(utt_files, id_length)
    # Fill the segments.txt file.
    with open(os.path.join(data_folder, 'segments.txt'), mode) as abk_file:
        abk_file.write('\n'.join(name + ' ' + name.strip('_') + '.wav'
                                 for name in utt_ids) + '\n')
    # Build the utt2spk, spk2utt, phones, silences and variants txt files.
    make_utt2spk_files(data_folder, utt_ids, mode)
    make_phones_files(data_folder, limit_phones, mode)
    # Load the corpus-specific to cross-corpus symbols conversion table.
    symbols = pd.read_csv(CONSTANTS['symbols_file'], index_col=corpus)
    symbols = symbols['common' + '_reduced' * limit_phones].to_dict()
    make_text_files(data_folder, utt_ids, get_transcription, symbols, mode)
예제 #25
0
 def _build_cutoff(self, cutoff):
     """Assign a cutoff frequency container as the 'cutoff' attribute."""
     # Check cutoff argument type.
     check_type_validity(cutoff, (tf.Tensor, np.ndarray, list, int),
                         'cutoff')
     if not isinstance(cutoff, tf.Tensor):
         cutoff = tf.constant(cutoff, dtype=tf.float32)
     if cutoff.dtype != tf.float32:
         cutoff = tf.cast(cutoff, tf.float32)
     # Check cutoff tensor rank.
     if not len(cutoff.shape):  # pylint: disable=len-as-condition
         cutoff = tf.expand_dims(cutoff, 0)
     elif len(cutoff.shape) != 1:
         raise TypeError("'cutoff' rank is not in [0, 1].")
     # Check number of cutoff frequencies.
     if cutoff.shape[0] == 1:
         cutoff = tf.concat([cutoff for _ in range(self.n_channels)],
                            axis=0)
     elif cutoff.shape[0] != self.n_channels:
         raise TypeError("Invalid 'cutoff' shape: %s" % cutoff.shape)
     # Assign the (adjusted) cutoff object as an attribute.
     self.cutoff = cutoff
예제 #26
0
def reduce_finite_mean(tensor, axis=None):
    """Compute the mean of finite elements across a tensor's dimensions.

    tensor : numeric-type tensor to reduce
    axis   : optional dimension index along which to reduce (int, default None)
    """
    # Check argument's type validity.
    check_type_validity(tensor, tf.Tensor, 'tensor')
    check_type_validity(axis, (int, type(None)), 'axis')
    # Compute the number of non-Nan elements across the reduction axis.
    is_finite = tf.is_finite(tensor)
    if axis is None:
        length = tf.reduce_sum(tf.ones_like(tensor, dtype=tf.int32))
    elif axis == 0:
        length = tensor_length(tensor)
    else:
        perm = [{0: axis, axis: 0}.get(i, i) for i in range(len(tensor.shape))]
        length = tensor_length(tf.transpose(tensor, perm=perm))
    n_obs = length - tf.reduce_sum(tf.cast(is_finite, tf.int32), axis=axis)
    # Compute the sum of non-Nan elements across the reduction axis.
    filled = tf.where(is_finite, tf.zeros_like(tensor), tensor)
    sums = tf.reduce_sum(filled, axis=axis)
    # Retun the mean(s) across the reduction axis.
    return sums / tf.cast(n_obs, tf.float32)
예제 #27
0
def load_dumped_model(filename, model=None):
    """Restore a neural network model from a .npy dump.

    filename : path to a .npy file containing a model's configuration
    model    : optional instantiated model whose weights to restore
               (default None, implying that a model is instantiated
               based on the dumped configuration and returned)
    """
    # Load the dumped model configuration and check its validity.
    config = np.load(filename).tolist()
    check_type_validity(config, dict, 'loaded configuration')
    missing_keys = [
        key for key in ('__init__', '__class__', '__rebuild_init__',
                        'architecture', 'values') if key not in config.keys()
    ]
    if missing_keys:
        raise KeyError("Invalid model dump. Missing key(s): %s." %
                       missing_keys)
    # If no model was provided, instantiate one.
    new_model = model is None
    if new_model:
        model = instantiate(config['__class__'], config['__init__'],
                            config['__rebuild_init__'])
        if 'session' not in config['__init__'].keys():
            model.reset_model()
    # Check that the provided or rebuild model is indeed a neural network.
    check_type_validity(model, NeuralNetwork,
                        'rebuilt model' if new_model else 'model')
    # Check that the model's architecture is coherent with the dump.
    if model.architecture != config['architecture']:
        raise TypeError("Invalid network architecture.")
    # Restore the model's weights.
    for name, layer in model.layers.items():
        layer.set_values(config['values'][name], model.session)
    # If the model was instantiated within this function, return it.
    return model if new_model else None
예제 #28
0
파일: _abxpy.py 프로젝트: tianchi03/ac2art
def abxpy_pipeline(features_file, task_file, output, n_jobs=1):
    """Run the ABXpy pipeline on a set of features.

    The pipeline run consists of the distance, score and analyze modules
    of ABXpy. Intermediary files will be removed, so that this function
    solely returns a .csv file summing up computed scores.

    features_file : path to a h5 file containing the features to evaluate
    task_file     : path to a task file output by the ABXpy task module
    output        : path to the output file to write
    n_jobs        : number of CPU cores to use (positive int, default 1)
    """
    check_type_validity(output, str, 'output')
    # Assign names to intermediary files.
    distance_file = '%i.distance' % time.time()
    score_file = '%i.score' % time.time()
    # Run the ABXpy pipeline.
    abxpy_distance(features_file, task_file, distance_file, n_jobs)
    abxpy_score(distance_file, task_file, score_file)
    abxpy_analyze(score_file, task_file, output)
    # Remove intermediary files.
    os.remove(distance_file)
    os.remove(score_file)
    print("Done running ABXpy. Results were written to '%s'." % output)
예제 #29
0
    def control_arguments(audio_forms, n_coeff, articulators_list,
                          ema_sampling_rate, audio_frames_time):
        """Control the arguments provided to extract some features.

        Build the necessary subfolders so as to store the processed data.

        Replace `audio_forms` and `articulators_list` with their
        default values when they are passed as None.

        Return the definitive values of the latter two arguments.
        """
        nonlocal default_articulators, new_folder
        # Check positive integer arguments.
        check_positive_int(ema_sampling_rate, 'ema_sampling_rate')
        check_positive_int(audio_frames_time, 'audio_frames_time')
        # Check audio_forms argument validity.
        valid_forms = ['lpc', 'lsf', 'mfcc', 'mfcc_']
        if audio_forms is None:
            audio_forms = valid_forms[:-1]
        else:
            if isinstance(audio_forms, str):
                audio_forms = [audio_forms]
            elif isinstance(audio_forms, tuple):
                audio_forms = list(audio_forms)
            else:
                check_type_validity(audio_forms, list, 'audio_forms')
            invalid = [name for name in audio_forms if name not in valid_forms]
            if invalid:
                raise ValueError("Unknown audio representation(s): %s." %
                                 invalid)
        # Check n_coeff argument validity.
        check_type_validity(n_coeff, (int, tuple, list), 'n_coeff')
        if isinstance(n_coeff, int):
            check_positive_int(n_coeff, 'single `n_coeff` value')
            n_coeff = [n_coeff] * len(audio_forms)
        elif len(n_coeff) != len(audio_forms):
            raise TypeError(
                "'n_coeff' sequence should be of same length as 'audio_forms'."
            )
        # Build necessary folders to store the processed data.
        for name in audio_forms + ['ema', 'voicing']:
            dirname = os.path.join(new_folder, name)
            if not os.path.isdir(dirname):
                os.makedirs(dirname)
        # Check articulators_list argument validity.
        if articulators_list is None:
            articulators_list = default_articulators
        else:
            check_type_validity(articulators_list, list, 'articulators_list')
        # Return potentially altered list arguments.
        return audio_forms, n_coeff, articulators_list
예제 #30
0
def validate_layer_config(config):
    """Validate that a given object is fit as a layer's configuration.

    Return the validated input, which may be extended with an empty dict.
    """
    # Check that the configuration is a three-elements tuple.
    if not isinstance(config, tuple):
        raise TypeError("Layer configuration elements should be tuples.")
    if len(config) == 2:
        config = (*config, {})
    elif len(config) != 3:
        raise TypeError(
            "Wrong layer configuration tuple length: should be 2 or 3.")
    # Check sub-elements types.
    check_type_validity(config[0], (str, type), 'layer class')
    check_type_validity(config[1], (int, list, tuple),
                        'layer primary parameter')
    check_type_validity(config[2], dict, 'layer config kwargs')
    # Return the config tuple.
    return config