def __init__(self, signal, cutoff, learnable=True, sampling_rate=200, window=5): """Initialize the filter. signal : signal to filter (tensorflow.Tensor of rank 1 to 3) cutoff : cutoff frequency (or frequencies) of the filter, in Hz (positive int or list, array or Tensor of such values) learnable : whether the cutoff frequency may be adjusted, e.g. in backpropagation procedures (bool, default True) sampling_rate : sampling rate of the signal, in Hz (int, default 200) window : half-size of the filtering window (int, default 5) Note: three-dimensional signals are treated as a batch of 2-D signals stacked along the first dimension, and are filtered as such, i.e. independently. """ check_positive_int(sampling_rate, 'sampling_rate') check_positive_int(window, 'window') super().__init__(signal, cutoff, learnable, sampling_rate=sampling_rate, window=window)
def __init__( self, input_data, layers_shape, batch_sizes=None, cell_type='lstm', activation='tanh', name='rnn', keep_prob=None ): """Instantiate the recurrent neural network. input_data : input data of the network (tensorflow.Tensor), either of shape [n_batches, max_time, input_size] or [len_sequence, input_size] layers_shape : number of units per layer (int or tuple of int) batch_sizes : true lengths of the batched sequences (for input tensors of rank 3 only) cell_type : type of recurrent cells to use (short name (str) or tensorflow.nn.rnn_cell.RNNCell subclass, default 'lstm', i.e. LSTMCell) activation : activation function of the cell units (function or function name, default 'tanh') name : name of the stack (using the same name twice will cause tensorflow to raise an exception) keep_prob : optional Tensor recording a keep probability used as a dropout parameter This needs overriding by subclasses to actually build the network out of the pre-validated arguments. The `weights` argument should also be filled in by subclasses. """ # Arguments serve modularity; pylint: disable=too-many-arguments # Check name validity. check_type_validity(name, str, 'name') self.name = name # Check input data valitidy. check_type_validity(input_data, tf.Tensor, 'input_data') if len(input_data.shape) == 2: self.input_data = tf.expand_dims(input_data, 0) self.batch_sizes = None self._batched = False elif len(input_data.shape) == 3: self.input_data = input_data if batch_sizes is None: raise ValueError( "With rank 3 'input_data', 'batch_sizes' is mandatory.") self.batch_sizes = batch_sizes self._batched = True else: raise TypeError("Invalid 'input_data' rank: should be 2 or 3.") # Check layers shape validity. check_type_validity(layers_shape, (tuple, int), 'layers_shape') if isinstance(layers_shape, int): layers_shape = (layers_shape,) for value in layers_shape: check_positive_int(value, "layer's number of units") self.layers_shape = layers_shape # Check keep_prob validity. check_type_validity(keep_prob, (tf.Tensor, type(None)), 'keep_prob') self.keep_prob = keep_prob # Set up the RNN cell type and activation function. self.cell_type = setup_rnn_cell_type(cell_type) self.activation = setup_activation_function(activation) # Set up an argument that needs assigning by subclasses. self.weights = None
def _validate_args(self): """Process the initialization arguments of the instance.""" # Control arguments common the any multilayer perceptron. super()._validate_args() # Control n_components argument and compute n_parameters. check_positive_int(self.n_components, 'n_components') self.n_parameters = self.n_components * (1 + 2 * self.n_targets)
def __init__(self, input_data, n_units, activation='relu', bias=True, name='DenseLayer', keep_prob=None): """Initialize the fully-connected neural layer. input_data : tensorflow variable or placeholder to use as input n_units : number of units of the layer activation : activation function or activation function name (default 'relu', i.e. `tensorflow.nn.relu`) bias : whether to add a bias constant to the transformed data passed to the activation function (bool, default True) name : optional name to give to the layer's inner operations keep_prob : optional Tensor recording a keep probability to use as a dropout parameter """ # Arguments serve modularity; pylint: disable=too-many-arguments # Check the arguments' validity. check_type_validity(input_data, tf.Tensor, 'input_data') if len(input_data.shape) not in (2, 3): raise TypeError("`input_data` must be of rank 2 or 3.") check_positive_int(n_units, 'n_units') check_type_validity(bias, bool, 'bias') check_type_validity(name, str, 'name') check_type_validity(keep_prob, (tf.Tensor, type(None)), 'keep_prob') # Set up the layer's activation function. self.activation = setup_activation_function(activation) # Set up the layer's weights, adjusting their initial value. # note: design loosely based on Glorot, X. & Bengio, Y. (2010) if self.activation is tf.nn.relu: stddev = np.sqrt(2 / n_units) elif self.activation in [tf.nn.tanh, tf.nn.softmax]: stddev = np.sqrt(3 / n_units) else: stddev = .1 initial = tf.truncated_normal([input_data.shape[-1].value, n_units], mean=0, stddev=stddev) self.weights = tf.Variable(initial, name=name + '_weight') # Optionally set up a learnable bias term. if bias: self.bias = tf.Variable(tf.constant(0.1, shape=[n_units]), name=name + '_bias') else: self.bias = None # Build the layer's processing of the inputs. if len(input_data.shape) == 2: self.output = self._feed_tensor(input_data) else: self.output = run_along_first_dim(self._feed_tensor, input_data) # Optionally set up dropout on top of the layer. self.keep_prob = keep_prob if self.keep_prob is not None: self.output = tf.nn.dropout(self.output, keep_prob=keep_prob)
def sequences_to_batch(sequences, length=None): """Batch a set of data sequences into a three-dimensional array. sequences : list of array of two-dimensional numpy arrays sharing the same shape on their last dimension length : optional size of the batched array's second dimension (otherwise, maximum sample length is used) Return a numpy.array of shape [n_sequences, length, sequences_width]. """ # Check sequences argument validity. check_type_validity(sequences, (list, np.ndarray), 'sequences') if isinstance(sequences, np.ndarray): if len(sequences.shape) == 2: sequences = [sequences] elif len(sequences.shape) != 1: raise TypeError( "'sequences' must be a list or a flat numpy array.") width = sequences[0].shape[1] valid = all( isinstance(sequence, np.ndarray) and len(sequence.shape) == 2 and sequence.shape[1] == width for sequence in sequences) if not valid: raise TypeError( "All sequences must be 2-D numpy arrays of same number of columns." ) # Gather the length of each and every sequence, truncated if needed. if length is None: batch_sizes = np.array([len(sequence) for sequence in sequences]) length = np.max(batch_sizes) else: check_positive_int(length, 'length') batch_sizes = np.array( [min(len(sequence), length) for sequence in sequences]) # Zero-pad the sequences and concatenate them. batched = np.array([ np.concatenate( [seq[:length], np.zeros((length - seq_length, seq.shape[1]))]) for seq, seq_length in zip(sequences, batch_sizes) ]) # Return the batched sequences and the true sequence lengths. return batched, batch_sizes
def abx_from_features(features, fileset=None, byspeaker=True, limit_phones=False, n_jobs=1): """Run the ABXpy pipeline on a set of pre-extracted {0} features. features : name of a h5 file of {0} features created with the `extract_h5_features` function (str) fileset : optional name of a fileset whose utterances' features to use (str) byspeaker : whether to discriminate pairs from the same speaker only (bool, default True) limit_phones : whether to aggregate some phonemes, using the 'ipa_reduced' column of the {0} symbols file as mapping (bool, default False) n_jobs : number of CPU cores to use (positive int, default 1) """ nonlocal abx_folder, corpus, make_abx_task check_type_validity(features, str, 'features') check_type_validity(fileset, (str, type(None)), 'fileset') check_positive_int(n_jobs, 'n_jobs') # Declare the path to the task file. task_name = get_task_name(fileset, limit_phones) task_name += 'byspk_' * byspeaker task_file = os.path.join(abx_folder, task_name + 'task.abx') # Declare paths to the input features and output scores files. features_file = os.path.join(abx_folder, features + '.features') scores_file = features + '_' + task_name.split('_', 1)[1] + 'abx.csv' scores_file = os.path.join(abx_folder, scores_file) # Check that the features file exists. if not os.path.exists(features_file): raise FileNotFoundError("No such file: '%s'." % features_file) # Build the ABX task file if necessary. if not os.path.isfile(task_file): make_abx_task(fileset, byspeaker, limit_phones) else: print('Using found %s file.' % task_file) # Run the ABXpy pipeline. abxpy_pipeline(features_file, task_file, scores_file, n_jobs) # Replace phone symbols with IPA ones in the scores file. add_ipa_symbols(scores_file)
def abxpy_distance(features_file, task_file, output, n_jobs=1): """Run the ABXpy distance module. features_file : path to a h5 file containing the features to evaluate task_file : path to a task file output by the ABXpy task module output : path to the output file to write n_jobs : number of CPU cores to use (positive int, default 1) """ check_batch_type(str, features_file=features_file, task_file=task_file, output=output) check_positive_int(n_jobs, 'n_jobs') distance = os.path.join(ABXPY_FOLDER, 'distance.py') distance += ' -n 1 -j %s' % n_jobs cmd = ' '.join(('python2', distance, features_file, task_file, output)) status = os.system(cmd) if status != 0: raise RuntimeError("ABXpy distance.py ended with status code %s." % status) print('ABXpy distance module was successfully run.')
def _validate_args(self): """Process the initialization arguments of the instance.""" # Validate the model's input layer shape. check_type_validity(self.input_shape, (tuple, list, tf.TensorShape), 'input_shape') if len(self.input_shape) not in [2, 3]: raise TypeError("'input_shape' must be of length 2 or 3.") if self.input_shape[-1] is None: raise ValueError("Last 'input_shape' dimension must be fixed.") # Validate the model's layers configuration. check_type_validity(self.layers_config, list, 'layers_config') for i, config in enumerate(self.layers_config): self.layers_config[i] = validate_layer_config(config) # Validate the model's optional top layer configuration. if self.top_filter is not None: self._init_arguments['top_filter'] = (validate_layer_config( self.top_filter)) # Validate the model's number of targets and their specification. check_positive_int(self.n_targets, 'n_targets') check_type_validity(self.use_dynamic, bool, 'use_dynamic') check_type_validity(self.binary_tracks, (list, type(None)), 'binary_tracks') if self.binary_tracks is not None: if self.binary_tracks: invalid = not all( isinstance(track, int) and 0 <= track < self.n_targets for track in self.binary_tracks) if invalid: raise TypeError( "'binary_tracks' should be a list of int in [0, %s]" % (self.n_targets - 1)) else: self._init_arguments['binary_tracks'] = None # Validate the model's normalization parameters. norm_params = self.norm_params check_type_validity(norm_params, (np.ndarray, type(None)), 'norm_params') if norm_params is not None and norm_params.shape != (self.n_targets, ): raise TypeError("Wrong 'norm_params' shape: %s instead of (%s,)" % (norm_params.shape, self.n_targets))
def control_arguments(audio_forms, n_coeff, articulators_list, ema_sampling_rate, audio_frames_time): """Control the arguments provided to extract some features. Build the necessary subfolders so as to store the processed data. Replace `audio_forms` and `articulators_list` with their default values when they are passed as None. Return the definitive values of the latter two arguments. """ nonlocal default_articulators, new_folder # Check positive integer arguments. check_positive_int(ema_sampling_rate, 'ema_sampling_rate') check_positive_int(audio_frames_time, 'audio_frames_time') # Check audio_forms argument validity. valid_forms = ['lpc', 'lsf', 'mfcc', 'mfcc_'] if audio_forms is None: audio_forms = valid_forms[:-1] else: if isinstance(audio_forms, str): audio_forms = [audio_forms] elif isinstance(audio_forms, tuple): audio_forms = list(audio_forms) else: check_type_validity(audio_forms, list, 'audio_forms') invalid = [name for name in audio_forms if name not in valid_forms] if invalid: raise ValueError("Unknown audio representation(s): %s." % invalid) # Check n_coeff argument validity. check_type_validity(n_coeff, (int, tuple, list), 'n_coeff') if isinstance(n_coeff, int): check_positive_int(n_coeff, 'single `n_coeff` value') n_coeff = [n_coeff] * len(audio_forms) elif len(n_coeff) != len(audio_forms): raise TypeError( "'n_coeff' sequence should be of same length as 'audio_forms'." ) # Build necessary folders to store the processed data. for name in audio_forms + ['ema', 'voicing']: dirname = os.path.join(new_folder, name) if not os.path.isdir(dirname): os.makedirs(dirname) # Check articulators_list argument validity. if articulators_list is None: articulators_list = default_articulators else: check_type_validity(articulators_list, list, 'articulators_list') # Return potentially altered list arguments. return audio_forms, n_coeff, articulators_list
def mlpg_from_gaussian_mixture(priors, means, stds, weights, n_steps=10): """Generate a trajectory out of a time sequence of gaussian mixture models. The algorithm used is taken from Tokuda, K. et alii (2000). Speech Parameter Generation Algorithms for HMM-based speech synthesis. It aims at generating the most likely trajectory sequence based on gaussian mixture density parameters fitted to an input sequence. priors : sequence of mixture components' priors (2-D tensor) means : sequence of componenents' multivariate means (3-D tensor) stds : sequence of components' standard deviations (2-D tensor) weights : matrix of weights to derive successive orders of dynamic features out of static ones (2-D tensor) n_steps : maximum number of iteration when updating the selected trajectory through an E-M algorithm (int, default 10) Each row of means must include means associated with (in that order) the static features, the first-order dynamic features and the second- order ones. """ # Test arguments validity. tf.control_dependencies([ tf.assert_rank(priors, 2), tf.assert_rank(means, 3), tf.assert_rank(stds, 3) ]) check_positive_int(n_steps, 'n_steps') # Set up the expectation step function. def generate_trajectory(means_sequence, stds_sequence): """Generate a trajectory and density-based metrics.""" features = mlpg_from_gaussian(means_sequence, stds_sequence, weights) densities = priors * tf.reduce_prod( gaussian_density(tf.expand_dims(features, 1), means, stds), axis=2) log_likelihood = tf.reduce_sum( tf.log(tf.reduce_sum(densities, axis=1) + 1e-30)) return features, densities, log_likelihood # Set up the maximization step function. def generate_parameters(densities): """Generate a parameters sequence using occupancy probabilities.""" # Compute occupancy probabilities (i.e. posterior of components). norm = tf.expand_dims(tf.reduce_sum(densities, axis=1), 1) occupancy = tf.expand_dims(densities / (norm + 1e-30), 2) # Derive a weighted sequence of means and standard deviations. return (tf.reduce_sum(occupancy * means, axis=1), tf.reduce_sum(occupancy * stds, axis=1)) # Set up a function running an E-M algorithm step. def run_step(index, previous_traject, previous_dens, previous_ll): """Run an iteration of the E-M algorithm for trajectory selection.""" # Run the maximization and expectation steps. means_seq, stds_seq = generate_parameters(previous_dens) trajectory, densities, log_likelihood = (generate_trajectory( means_seq, stds_seq)) # Either return the updated results or interrupt the process. return tf.cond( log_likelihood > previous_ll, lambda: (index + 1, trajectory, densities, log_likelihood), lambda: (n_steps, previous_traject, previous_dens, previous_ll)) # Choose an initial trajectory, using the component's priors as weights. init_trajectory, init_densities, init_ll = generate_trajectory( tf.reduce_sum(tf.expand_dims(priors, 2) * means, axis=1), tf.reduce_sum(tf.expand_dims(priors, 2) * stds, axis=1)) # Iteratively update the selected trajectory with an E-M algorithm. _, trajectory, _, _ = tf.while_loop( lambda i, *_: i < n_steps, run_step, [tf.constant(0), init_trajectory, init_densities, init_ll]) return trajectory