def build_triphones_indexer(limit, corpus, same_speaker_data): """Build an index of triphones appearing in each utterance of a corpus. limit : minimum number of utterances a triphone must appear in to be indexed Return a dict associating the (reduced) set of triphones comprised in an utterance with the latter's name. """ # Load the dependency functions associated with the corpus to index. load_phone_labels, get_utterances_list, speakers = import_from_string( module='ac2art.corpora.%s.raw._loaders' % corpus, elements=['load_phone_labels', 'get_utterances_list', 'SPEAKERS']) # Define an auxiliary function to read an utterance's triphones. def load_triphones(name): """Load the set of triphones contained in a given utterance.""" labels = load_phone_labels(name) return { '_'.join([phone[1] for phone in labels[i:i + 3]]) for i in range(len(labels) - 2) } # Gather an index of triphones contained in each utterance. # If utterances are identical for each speaker, read a unique # list and index it with ranks instead of names. if same_speaker_data: utterances = { i: load_triphones(name) for i, name in enumerate(get_utterances_list(speakers[0])) } # Otherwise, gather the utterances from each and every speaker. else: utterances = { name: load_triphones(name) for name in get_utterances_list() } # Gathe the full set of triphones. all_triphones = { triphone for utt_triphones in utterances.values() for triphone in utt_triphones } # Select the triphones of interest. triphones = { triphone for triphone in all_triphones if sum(triphone in utt for utt in utterances.values()) >= limit } # Reduce the dict referencing triphones associated to each utterance. utterances_index = { utterance: [phone for phone in utt_triphones if phone in triphones] for utterance, utt_triphones in utterances.items() } return utterances_index
def prepare_abkhazia_corpus(corpus, data_folder, limit_phones=True, mode='w', id_length=None): """Build or complete a corpus's data/ folder for use with abkhazia. corpus : name of the corpus whose data to prepare (str) data_folder : path to the 'data/' folder to build or complete limit_phones : whether to map the corpus' phones to a restricted set of IPA phones, thus aggregating some (bool, default True) mode : file writing mode (either 'w' or 'a', default 'w') id_length : optional fixed length of utterances' id used internally Note: the `mode` and `id_length` parameters may be used to pile up data from multiple corpora in a single data/ folder, thus having abkhazia treat them as one large corpus. In this case, please be careful about corpus-specific phone symbols overlap. """ # Check arguments validity. check_type_validity(corpus, str, 'corpus') check_type_validity(data_folder, str, 'data_folder') check_type_validity(limit_phones, bool, 'limit_phones') if mode not in ('w', 'a'): raise TypeError("'mode' should be a str in {'a', 'w'}.") check_type_validity(id_length, (int, type(None)), 'id_length') # Make the output directories if needed. wav_folder = os.path.join(data_folder, 'wavs') for folder in (data_folder, wav_folder): if not os.path.isdir(folder): os.makedirs(folder) # Gather dependency functions. copy_wavs, get_transcription = import_from_string( 'ac2art.corpora.%s.abkhazia._loaders' % corpus, ['copy_wavs', 'get_transcription']) # Copy wav files to the data folder and gather the utterances list. utt_files = copy_wavs(wav_folder) utt_ids = normalize_utterance_ids(utt_files, id_length) # Fill the segments.txt file. with open(os.path.join(data_folder, 'segments.txt'), mode) as abk_file: abk_file.write('\n'.join(name + ' ' + name.strip('_') + '.wav' for name in utt_ids) + '\n') # Build the utt2spk, spk2utt, phones, silences and variants txt files. make_utt2spk_files(data_folder, utt_ids, mode) make_phones_files(data_folder, limit_phones, mode) # Load the corpus-specific to cross-corpus symbols conversion table. symbols = pd.read_csv(CONSTANTS['symbols_file'], index_col=corpus) symbols = symbols['common' + '_reduced' * limit_phones].to_dict() make_text_files(data_folder, utt_ids, get_transcription, symbols, mode)
def build_features_extraction_functions(corpus, initial_sampling_rate, default_articulators, docstring_details): """Define and return a raw features extraction function for a corpus. corpus : name of the corpus whose features to extract (str) initial_sampling_rate : initial sampling rate of the EMA data, in Hz (int) default_articulators : default articulators to keep (list of str) docstring_details : docstring complement for the returned functions Return a single function: - extract_utterances_data """ # Long but explicit function name; pylint: disable=invalid-name # Define auxiliary functions through wrappers. control_arguments = build_arguments_checker(corpus, default_articulators) extract_data = build_extractor(corpus, initial_sampling_rate) # Import the get_utterances_list dependency function. get_utterances_list = import_from_string( 'ac2art.corpora.%s.raw._loaders' % corpus, 'get_utterances_list') # Define a function extracting features from all utterances. def extract_utterances_data(audio_forms=None, n_coeff=13, articulators_list=None, ema_sampling_rate=100, audio_frames_time=25): """Extract acoustic and articulatory data of each {0} utterance. audio_forms : optional list of representations of the audio data to produce, among {{'lsf', 'lpc', 'mfcc'}} (list of str, default None implying all of them) n_coeff : number of static coefficients to compute for each representation of the audio data, either as a single int or a list of int (default 13) Note : dynamic features will be added to those. articulators_list : optional list of raw EMA data columns to keep (default None, implying twelve, detailed below) ema_sampling_rate : sample rate of the EMA data to use, in Hz (int, default 100) audio_frames_time : duration of the audio frames used to compute acoustic features, in milliseconds (int, default 25) Data extractation includes the following: - optional resampling of the EMA data - framing of audio data to align acoustic and articulatory records - production of various acoustic features based on the audio data - trimming of silences at the beginning and end of each utterance Note: 'mfcc' audio form will produce MFCC coefficients enriched with pitch features, computed using abkhazia. Alternative computation using `data.commons.loaders.Wav.get_mfcc()` may be obtained using the 'mfcc_' keyword instead. The produced data is stored to the '{0}_processed_folder' set in the json configuration file, where a subfolder is built for each kind of features (ema, mfcc, etc.). Each utterance is stored as a '.npy' file. The file names include the utterance's name, extended with an indicator of the kind of features it contains. {1} """ nonlocal corpus, control_arguments, extract_data, get_utterances_list # Check arguments, assign default values and build output folders. audio_forms, n_coeff, articulators_list = control_arguments( audio_forms, n_coeff, articulators_list, ema_sampling_rate, audio_frames_time) # Compute mfcc coefficients using abkhazia, if relevant. abkhazia_mfcc = 'mfcc' in audio_forms if abkhazia_mfcc: mfcc_ix = audio_forms.index('mfcc') wav_to_mfcc(corpus, n_coeff=n_coeff[mfcc_ix], pitch=True, frame_time=audio_frames_time, hop_time=(1000 / ema_sampling_rate)) del audio_forms[mfcc_ix] del n_coeff[mfcc_ix] # Iterate over all corpus utterances. for utterance in get_utterances_list(): extract_data(utterance, audio_forms, n_coeff, abkhazia_mfcc, articulators_list, ema_sampling_rate, audio_frames_time) end_time = time.asctime().split(' ')[-2] print('%s : Done with utterance %s.' % (end_time, utterance)) sys.stdout.write('\033[F') # Record the list of articulators. path = os.path.join(CONSTANTS['%s_processed_folder' % corpus], 'ema', 'articulators') with open(path, 'w', encoding='utf-8') as file: file.write('\n'.join(articulators_list)) # Adjust the function's docstring and return it. extract_utterances_data.__doc__ = (extract_utterances_data.__doc__.format( corpus, docstring_details)) return extract_utterances_data
def build_extractor(corpus, initial_sampling_rate): """Define and return a function to extract features from an utterance. corpus : name of the corpus from which to import features initial_sampling_rate : initial sampling rate of the EMA data, in Hz (int) """ # Load the output path and dependency data loading functions. new_folder = CONSTANTS['%s_processed_folder' % corpus] load_ema, load_phone_labels, load_voicing, load_wav = import_from_string( module='ac2art.corpora.%s.raw._loaders' % corpus, elements=['load_ema', 'load_phone_labels', 'load_voicing', 'load_wav']) def get_boundaries(utterance, sampling_rate): """Return frames index to use so as to trim edge silences.""" nonlocal load_phone_labels # Load phone labels and gather edge silences' timecodes. labels = load_phone_labels(utterance) start_time = labels[0][0] if labels[0][1] == '#' else 0 end_time = labels[-2][0] if labels[-1][1] == '#' else labels[-1][0] # Compute and return associated frame indexes. start_frame = int(np.floor(start_time * sampling_rate)) end_frame = int(np.ceil(end_time * sampling_rate)) return start_frame, end_frame def extract_ema(utterance, sampling_rate, articulators): """Extract and return the EMA data associated with an utterance.""" nonlocal initial_sampling_rate, load_ema, new_folder # Load EMA data and interpolate NaN values using cubic splines. ema, _ = load_ema(utterance, articulators) ema = np.concatenate([ interpolate_missing_values(data_column).reshape(-1, 1) for data_column in np.transpose(ema) ], axis=1) # Optionally resample the EMA data. if sampling_rate != initial_sampling_rate: ratio = sampling_rate / initial_sampling_rate ema = scipy.signal.resample(ema, num=int(len(ema) * ratio)) # Return the EMA data. return ema def extract_audio(utterance, audio_forms, n_coeff, sampling_rate, frames_time): """Generate and return speech features for an utterance.""" # Wrapped function; pylint: disable=too-many-arguments nonlocal corpus, load_wav, new_folder hop_time = (1000 / sampling_rate) wav = load_wav(utterance, frames_time, hop_time) return { name: wav.get(name.strip('_'), n_feat, static_only=False) for name, n_feat in zip(audio_forms, n_coeff) } def extract_data(utterance, audio_forms, n_coeff, abkhazia_mfcc, articulators, sampling_rate, frames_time): """Extract acoustic and articulatory data of a given utterance.""" # Wrapped function; pylint: disable=too-many-arguments nonlocal load_wav, new_folder nonlocal extract_audio, extract_ema, get_boundaries # Generate or load all kinds of features for the utterance. data = extract_audio(utterance, audio_forms, n_coeff, sampling_rate, frames_time) data['ema'] = extract_ema(utterance, sampling_rate, articulators) data['voicing'] = load_voicing(utterance, sampling_rate) if abkhazia_mfcc: path = os.path.join(new_folder, 'mfcc', utterance + '.npy') data['mfcc'] = np.load(path) # Fit the edge silences trimming values. start_frame, end_frame = get_boundaries(utterance, sampling_rate) original_end = end_frame for name, array in data.items(): length = len(array) if length < start_frame: raise ValueError( "Utterance '%s': '%s' features are shorter than the" "expected start trimming zone." % (utterance, name)) if length < original_end: print("Utterance '%s': '%s' features are shorter than expected" "(%s vs %s).\nAll features will be trimmed to fit." % (utterance, name, length, original_end)) if length < end_frame: end_frame = length # Trim and save all features sets to disk. for name, array in data.items(): path = os.path.join(new_folder, name, utterance + '_' + name + '.npy') np.save(path, array[start_frame:end_frame]) # Return the previous last function. return extract_data
def build_file_loaders(corpus): """Define and return functions to load single-file data or parameters.""" # Load dependency constants and function. data_folder = CONSTANTS['%s_processed_folder' % corpus] get_utterances_list = import_from_string( 'ac2art.corpora.%s.raw._loaders' % corpus, 'get_utterances_list' ) # Define the four loading functions. def get_norm_parameters(file_type, speaker=None): """Return normalization parameters for a type of {0} features. file_type : type of features whose parameters to return (str) speaker : optional speaker whose parameters to return (str) (otherwise, corpus-wide parameters are returned) """ nonlocal data_folder path = _get_normfile_path(data_folder, file_type, speaker) return np.load(path).tolist() def get_utterances(set_name=None): """Get the list of utterances from a given set. set_name : name of the set, e.g. 'train', 'validation' or 'test' """ nonlocal data_folder, get_utterances_list if set_name is None: return get_utterances_list() path = os.path.join(data_folder, 'filesets', set_name + '.txt') with open(path) as file: utterances = [row.strip('\n') for row in file] return utterances def load_acoustic( name, audio_type='mfcc_stds', context_window=0, zero_padding=True ): """Load the acoustic data associated with an utterance from {0}. name : name of the utterance whose data to load (str) audio_type : name of the audio features to use, including normalization indications (str, default 'mfcc_stds') context_window : half-size of the context window of frames to return (default 0, returning single audio frames) zero_padding : whether to zero-pad the data when building context frames (bool, default True) """ nonlocal data_folder audio_type, norm_type = (audio_type + '_').split('_', 1) folder = ( audio_type + '_norm_' + norm_type.strip('_') if norm_type else audio_type ) path = os.path.join(data_folder, folder, name + '_%s.npy' % audio_type) acoustic = np.load(path) if context_window: acoustic = ( build_context_windows(acoustic, context_window, zero_padding) ) return acoustic def load_ema( name, norm_type='', use_dynamic=True, articulators=None ): """Load the articulatory data associated with an utterance from {0}. name : name of the utterance whose data to load (str) norm_type : optional type of normalization to use (str) use_dynamic : whether to return dynamic features (bool, default True) articulators : optional list of articulators to load """ nonlocal corpus, data_folder, get_norm_parameters # Load the EMA data with proper normalization. ema_folder = ( 'ema' if norm_type in ('', 'mean', 'mean_byspeaker') else 'ema_norm_' + norm_type ) ema = np.load(os.path.join(data_folder, ema_folder, name + '_ema.npy')) if norm_type.startswith('mean'): speaker = None if norm_type == 'mean' else name.split('_', 1)[0] ema -= get_norm_parameters('ema', speaker)['global_means'] # Optionally select articulatory data to keep. add_voicing = True if isinstance(articulators, list): if 'voicing' in articulators: articulators = [e for e in articulators if e != 'voicing'] else: add_voicing = False articulators_list = load_articulators_list(corpus, norm_type) invalid = [ name for name in articulators if name not in articulators_list ] if invalid: raise KeyError( 'Invalid articulator(s): %s.\nValid articulators are %s.' % (invalid, articulators_list) ) cols_index = [articulators_list.index(key) for key in articulators] ema = ema[:, cols_index] # Optionally add dynamic features. if use_dynamic: ema = add_dynamic_features(ema) # Optionally add binary voicing data. if add_voicing: voicing = np.load( os.path.join(data_folder, 'voicing', name + '_voicing.npy') ) if use_dynamic: n_static = ema.shape[1] // 3 ema = np.concatenate( [ema[:, :n_static], voicing, ema[:, n_static:]], axis=1 ) else: ema = np.concatenate([ema, voicing], axis=1) # Return the articulatory data. return ema # Adjust the functions' docstrings and return them. functions = (get_norm_parameters, get_utterances, load_acoustic, load_ema) for function in functions: function.__doc__ = function.__doc__.format(corpus) return functions
def split_corpus_prototype(pct_train, limit, seed, corpus, lowest_limit, same_speaker_data): """Split the {0} corpus, ensuring good triphones coverage of the sets. pct_train : percentage of observations used as training data; the rest will be divided equally between the validation and test sets float (between 0 and 1, default .7) limit : minimum number of utterances a triphone must appear in so as to be taken into account (int, default {1}) seed : optional random seed to use Produce three lists of utterances, composing train, validation and test filesets. The filesets are built so that each triphone present in at list `limit` utterances appears at least once in each fileset. The filesets' length will also be made to match the `pct_train` argument. To achieve this, the split is conducted in two steps. * First, utterances are iteratively drawn in random order, and added to the fileset to which they add the most not-yet-covered triphones. This mechanically results in three filesets correctly covering the set of triphones (if not, the algorithm is restarted). * Then, utterances are randomly removed from the fileset(s) which prove too large compared to the desired split, under the condition that their removal does not break the triphones-coverage property. These utterances are then randomly re-assigned to the filesets which are too small. Note: due to the structure of the {0} utterances, using a `limit` parameter under {1} will generally fail. {2} The produced filesets are stored to the filesets/ subfolder of the processed {0} folder, in txt files named 'train', 'validation' and 'test'. """ # Check arguments' validity check_type_validity(pct_train, float, 'pct_train') check_type_validity(limit, int, 'limit') if not 0 < pct_train < 1: raise ValueError('Invalid `pct_train` value: %s.' % pct_train) if limit < 3: raise ValueError('Minimum `limit` value is 3.') elif limit < lowest_limit: print('Warning: using such a low `limit` value is due to fail.') # Build the filesets. np.random.seed(seed) indexer = build_triphones_indexer(limit, corpus, same_speaker_data) filesets = build_initial_split(indexer) filesets = adjust_filesets(filesets, pct_train, indexer) # In case of identical speaker data, generalize the split to all speakers. if same_speaker_data: get_utterances_list, speakers = import_from_string( module='ac2art.corpora.%s.raw._loaders' % corpus, elements=['get_utterances_list', 'SPEAKERS']) utterances = { speaker: get_utterances_list(speaker) for speaker in speakers } filesets = [[ utterances[speaker][i] for speaker in speakers for i in fileset ] for fileset in filesets] # Write the produced filesets to txt files. filesets_dict = dict(zip(('train', 'validation', 'test'), filesets)) store_filesets(filesets_dict, corpus)
def build_normalization_functions(corpus): """Define and return corpus-specific data normalization functions. Return two functions, in the following order: - compute_moments - normalize_files """ # Gather dataset-specific dependencies. main_folder = CONSTANTS['%s_processed_folder' % corpus] get_utterances_list, speakers = import_from_string( 'ac2art.corpora.%s.raw._loaders' % corpus, ['get_utterances_list', 'SPEAKERS']) # Wrap the normalization parameters computing function. def compute_moments(file_type, by_speaker=False, store=True): """Compute files moments.""" nonlocal speakers # Optionally compute speaker-wise normalization parameters. if by_speaker: return { speaker: _compute_moments(file_type, speaker, store, main_folder, get_utterances_list) for speaker in speakers } # Otherwise, compute corpus-wide parameters. return _compute_moments(file_type, None, store, main_folder, get_utterances_list) # Wrap the files normalization functon. def normalize_files(file_type, norm_type, scope='corpus'): """Normalize pre-extracted {0} data of a given type. Normalization includes de-meaning and division by either standard-deviation or the difference between the extremum points (distribution spread). Those parameters may either be computed file-wise, speaker-wise or corpus-wide. file_type : one of {{'ema', 'energy', 'lpc', 'lsf', 'mfcc'}} norm_type : normalization divisor to use ('spread' or 'stds') scope : scope of the normalization parameters to use ('corpus' for corpus-wide (default), 'speaker' for speaker-wise and 'file' for file-wise) Normalized utterances are stored as .npy files in a properly-named folder. """ nonlocal compute_moments, get_utterances_list, main_folder, speakers if scope == 'corpus': _corpus_wide_normalize(file_type, norm_type, None, main_folder, get_utterances_list, compute_moments) elif scope == 'speaker': for speaker in speakers: _corpus_wide_normalize(file_type, norm_type, speaker, main_folder, get_utterances_list, compute_moments) elif scope == 'file': _file_wise_normalize(file_type, norm_type, main_folder, get_utterances_list) else: raise ValueError( "'scope' should be one of {'corpus', 'speaker', 'file'}.") # Adjust the functions' docstrings and return them. compute_moments.__doc__ = _compute_moments.__doc__.format(corpus) normalize_files.__doc__ = normalize_files.__doc__.format(corpus) return compute_moments, normalize_files
def build_h5features_extractor(corpus): """Define and return a function extracting features to h5 files. Return a single function: - extract_h5_features """ # Load dependency path and functions. abx_folder = os.path.join(CONSTANTS['%s_processed_folder' % corpus], 'abx') load_acoustic, load_ema, get_utterances = import_from_string( 'ac2art.corpora.%s.load._load' % corpus, ['load_acoustic', 'load_ema', 'get_utterances']) # Define features extraction functions. def _setup_features_loader(audio_features, ema_features, inverter, dynamic_ema, articulators): """Build a function to load features associated with an utterance. See `extract_h5_features` documentation for arguments. """ nonlocal load_acoustic, load_ema # Check that provided arguments make sense. if audio_features is None and ema_features is None: raise RuntimeError('No features were set to be included.') if inverter is not None: check_type_validity(inverter, (NeuralNetwork, None), 'inverter') if audio_features is None: raise RuntimeError( 'No acoustic features specified to feed the inverter.') elif ema_features is not None: raise RuntimeError( 'Both ema features and an inverter were specified.') # Build the acoustic features loading function. if audio_features is not None: window = (0 if inverter is None or inverter.input_shape[-1] % 11 else 5) load_audio = functools.partial(load_acoustic, audio_type=audio_features, context_window=window) # Optionally build and return an inverter-based features loader. if inverter is not None: def invert_features(utterance): """Return the features inverted from an utterance.""" pred = inverter.predict(load_audio(utterance)) return pred return invert_features if ema_features is None: return load_audio # Build the articulatory features loading function. if ema_features is not None: load_articulatory = functools.partial(load_ema, norm_type=ema_features, use_dynamic=dynamic_ema, articulators=articulators) if audio_features is None: return load_articulatory # When appropriate, build a global features loading function. def load_features(utterance): """Load the features associated with an utterance.""" return np.concatenate( [load_audio(utterance), load_articulatory(utterance)], axis=1) return load_features def extract_h5_features(audio_features=None, ema_features=None, inverter=None, output_name='%s_features' % corpus, articulators=None, dynamic_ema=True, sampling_rate=100): """Build an h5 file recording audio features associated with {0} data. audio_features : optional name of audio features to use, including normalization indications ema_features : optional name of ema features' normalization to use (use '' for raw data and None for no EMA data) inverter : optional acoustic-articulatory inverter whose predictions to use, based on the audio features output_name : base name of the output file (default '{0}_features') articulators : optional list of articulators to keep among EMA data dynamic_ema : whether to include dynamic articulatory features (bool, default True) sampling_rate : sampling rate of the frames, in Hz (int, default 100) """ # Arguments serve modularity; pylint: disable=too-many-arguments nonlocal abx_folder, get_utterances, _setup_features_loader # Build the abx folder, if necessary. if not os.path.isdir(abx_folder): os.makedirs(abx_folder) # Check that the destination file does not exist. output_file = os.path.join(abx_folder, '%s.features' % output_name) if os.path.isfile(output_file): raise FileExistsError("File '%s' already exists." % output_file) # Set up the features loading function. load_features = _setup_features_loader(audio_features, ema_features, inverter, dynamic_ema, articulators) # Load the list of utterances and process them iteratively. utterances = get_utterances() with h5f.Writer(output_file) as writer: for i in range(0, len(utterances), 100): # Load or compute utterances list, features and time labels. items = utterances[i:i + 100] features = [load_features(item) for item in items] labels = [ np.arange(len(data)) / sampling_rate for data in features ] # Write the currently processed utterances' data to h5. writer.write(h5f.Data(items, labels, features, check=True), groupname='features', append=True) # Adjust the features extraction function's docstring and return it. extract_h5_features.__doc__ = extract_h5_features.__doc__.format(corpus) return extract_h5_features
def build_abxpy_callers(corpus): """Define and return corpus-specific functions to run ABXpy tasks. Return four functions, in that order: - abx_from_features - make_abx_task - make_itemfile - load_abx_scores """ # pylint: disable=too-many-statements # Load dependency path and function. abx_folder = os.path.join(CONSTANTS['%s_processed_folder' % corpus], 'abx') get_utterances = import_from_string( 'ac2art.corpora.%s.load._load' % corpus, 'get_utterances') load_phone_labels = import_from_string( 'ac2art.corpora.%s.raw._loaders' % corpus, 'load_phone_labels') # Define the functions. def _phones_to_itemfile(utterance, symbols): """Build a dict of item file rows for a given utterance.""" nonlocal load_phone_labels phones = load_phone_labels(utterance) times = [round(time - phones[0][0], 3) for time, _ in phones[:-1]] phones = [symbols[phone] for _, phone in phones] return { '#file': [utterance] * (len(times) - 1), 'onset': times[:-1], 'offset': times[1:], '#phone': phones[1:-1], 'context': [ phones[i - 1] + '_' + phones[i + 1] for i in range(1, len(times)) ], 'speaker': utterance.split('_')[0] } def get_task_name(fileset, limit_phones): """Return the base name of an ABX task file based on parameters.""" nonlocal corpus fileset = '' if fileset is None else fileset + '_' reduced = 'reduced_' * limit_phones return corpus + '_' + fileset + reduced def make_itemfile(fileset=None, limit_phones=False): """Build a .item file for ABXpy recording {0} phone labels. fileset : optional set name whose utterances to use (str) limit_phones : whether to aggregate some phonemes, using the 'common_reduced' column of the symbols file as mapping (bool, default False) """ nonlocal abx_folder, corpus, get_utterances, _phones_to_itemfile print('Creating item file...') # Establish the item file's location. output_file = get_task_name(fileset, limit_phones) + 'phones.item' output_file = os.path.join(abx_folder, output_file) # Write the item file's header. columns = ['#file', 'onset', 'offset', '#phone', 'context', 'speaker'] with open(output_file, mode='w') as itemfile: itemfile.write(' '.join(columns) + '\n') # Load the corpus-specific to cross-corpus phone symbols mapping dict. # note: non-ipa cross-corpus symbols are used because ABXpy # (python 2) does not support non-ascii characters symbols = pd.read_csv( CONSTANTS['symbols_file'], index_col=corpus)['common' + '_reduced' * limit_phones].to_dict() # Iteratively add utterances phone labels to the item file. for utterance in get_utterances(fileset): items = pd.DataFrame(_phones_to_itemfile(utterance, symbols)) items[columns].to_csv(output_file, index=False, header=False, sep=' ', mode='a', encoding='utf-8') print('Done creating %s file.' % output_file) def make_abx_task(fileset=None, byspeaker=True, limit_phones=False): """Build a .abx ABXpy task file associated with {0} phones. fileset : optional set name whose utterances to use (str) byspeaker : whether to discriminate pairs from the same speaker only (bool, default True) limit_phones : whether to aggregate some phonemes, using the 'common_reduced' column of the symbols file as mapping (bool, default False) """ nonlocal abx_folder, corpus, make_itemfile print('Creating task file...') # Build the item file if necessary. task_name = get_task_name(fileset, limit_phones) item_file = os.path.join(abx_folder, task_name + 'phones.item') if not os.path.isfile(item_file): make_itemfile(fileset, limit_phones) else: print('Using found %s file.' % item_file) # Establish the task file's path and the ABXpy task's 'on' argument. output_file = os.path.join( abx_folder, task_name + ('byspk_' * byspeaker) + 'task.abx') within = 'context speaker' if byspeaker else 'context' # Run the ABXpy task module. abxpy_task(item_file, output_file, on='phone', by=within) print('Done creating %s file.' % output_file) def abx_from_features(features, fileset=None, byspeaker=True, limit_phones=False, n_jobs=1): """Run the ABXpy pipeline on a set of pre-extracted {0} features. features : name of a h5 file of {0} features created with the `extract_h5_features` function (str) fileset : optional name of a fileset whose utterances' features to use (str) byspeaker : whether to discriminate pairs from the same speaker only (bool, default True) limit_phones : whether to aggregate some phonemes, using the 'ipa_reduced' column of the {0} symbols file as mapping (bool, default False) n_jobs : number of CPU cores to use (positive int, default 1) """ nonlocal abx_folder, corpus, make_abx_task check_type_validity(features, str, 'features') check_type_validity(fileset, (str, type(None)), 'fileset') check_positive_int(n_jobs, 'n_jobs') # Declare the path to the task file. task_name = get_task_name(fileset, limit_phones) task_name += 'byspk_' * byspeaker task_file = os.path.join(abx_folder, task_name + 'task.abx') # Declare paths to the input features and output scores files. features_file = os.path.join(abx_folder, features + '.features') scores_file = features + '_' + task_name.split('_', 1)[1] + 'abx.csv' scores_file = os.path.join(abx_folder, scores_file) # Check that the features file exists. if not os.path.exists(features_file): raise FileNotFoundError("No such file: '%s'." % features_file) # Build the ABX task file if necessary. if not os.path.isfile(task_file): make_abx_task(fileset, byspeaker, limit_phones) else: print('Using found %s file.' % task_file) # Run the ABXpy pipeline. abxpy_pipeline(features_file, task_file, scores_file, n_jobs) # Replace phone symbols with IPA ones in the scores file. add_ipa_symbols(scores_file) def load_abx_scores(filename): """Load, aggregate and return some pre-computed abx scores.""" nonlocal abx_folder, corpus # Load the ABX scores. path = os.path.join(abx_folder, filename + '_abx.csv') data = pd.read_csv(path) # Collapse the scores (i.e. forget about contexts and speakers). data['score'] *= data['n'] data['phones'] = data.apply( lambda row: '_'.join(sorted([row['phone_1'], row['phone_2']])), axis=1) scores = data.groupby('phones')[['score', 'n']].sum() scores['score'] /= scores['n'] # Return the properly-formatted scores. return scores # Adjust functions' docstrings and return them. functions = (abx_from_features, make_abx_task, make_itemfile, load_abx_scores) for function in functions: function.__doc__ = function.__doc__.format(corpus) return functions