def read_corpus_file(words_df, file, audio_basedir, include_wav, sample_rate): """ Reading one wav file in the corpus. Args: words_df : A data frame contains all information belong to the specific audio file. file (str): The audio file in the Corpus. audio_basedir (str) : The path to the corpus. include_wav: The data frame contains the wav form as well or not. sample_rate (int) : Sample rate. Returns: pandas.DataFrame with columns wordtoken, start, end, File, Prev, triphones, FBSFs """ try: wav_dir = os.path.join(audio_basedir, file) print("Reading the file: ", wav_dir) wav = read_wav_file(wav_dir, sample_rate) word_signals = get_sig_of_words(wav, words_df.start.values, words_df.end.values, sample_rate) word_boundaries = get_boundary_of_words(word_signals, np.argmin, 800, 1000) word_boundary_parts = split_signal(word_signals, word_boundaries) rds_df = words_df.drop(['FBSFs'], axis=1) rds_df["FBSFs"] = get_summary_freq_band(word_boundary_parts, sample_rate) rds_df["chunk_index"] = word_boundaries rds_df["chunk_duration"] = rds_df["chunk_index"].apply(lambda x: x / sample_rate) if include_wav: rds_df["wav"] = word_signals return rds_df except Exception as e: save_as_exception(audio_basedir, file, e)
def read_corpus_files(rds_df, audio_basedir, include_wav, sample_rate): """ Reading all files in the corpus. Args: rds_df : A data frame contains all words and wav file path audio_basedir (str) : dir path to audio files include_wav: The data frame contains the wav form as well or not. sample_rate (int) : Sample rate. Returns: pandas.DataFrame with columns wordtoken, start, end, File, Prev, triphones, FBSFs """ files = rds_df.File.unique().tolist() df_all_files = [] for file in files: try: if Path(os.path.join(audio_basedir, file)).exists(): df_all_files.append(read_corpus_file(rds_df.loc[rds_df['File'] == file], file, audio_basedir, include_wav, sample_rate)) except Exception as e: save_as_exception(audio_basedir, file, e) print(e) pass return df_all_files
def get_mel_spec(sig, sample_rate): """ Compute log Mel-filter bank energy features from an audio signal. Args: sig (array like) : Signal data. Must be real. sample_rate (int) : Sample rate. Return: Mel spectrum. """ try: mel_spec = python_speech_features.logfbank(sig, samplerate=sample_rate, winlen=0.005, winstep=0.005, nfilt=21, preemph=0.97) mel_spec = mel_spec.T mini = np.amin(mel_spec) maxi = np.amax(mel_spec) if mini == maxi: return None else: mel_spec = np.ceil((mel_spec - mini) * (5 / np.abs(mini - maxi))) return mel_spec except Exception as e: save_as_exception("Root", "Mel Spectrum", e) return None
def rolling_window(function, word_env, window): """ A helper function which applying a defined function on a list. Args: function : A function which will be apply on enveloped word. For example: np.argmin. word_env : An array of enveloped signal for one word. window (int) : The sample long window (For example: 1000) Returns: A list of indices which meets the function. """ le = window // 2 ri = window - le i = le pos = [] try: while i + ri <= len(word_env): if function(word_env[(i - le):(i + ri)]) == le: pos.append(i) i += ri else: i += 1 except Exception as e: save_as_exception("Root", "rolling_window", e) return pos
def read_pkl_file(path): """ Reading a pkl file using the Pandas package. Args: path: The path of the file, Example: './data/words.pkl' Returns: pandas.DataFrame """ try: return pd.read_pickle(path) except Exception as e: print(type(e)) save_as_exception("root", path, e)
def read_nc_file(path): """ Reading a nc file using the xarray package. Args: path: The path of the file, Example: './data/words.nc' Returns: xnarray """ try: return xr.open_dataarray(path) except Exception as e: print(type(e)) save_as_exception("root", path, e)
def read_rds_file(path): """ Reading a rds file using the pyreadr package. It contains all words in the the corpus. Args: path: The path of the file, Example: './data/words.rds' Returns: pandas.DataFrame with columns wordtoken, start, end, File, Prev, FBSFs, triphones """ try: result = pyreadr.read_r(path) return result[None] except Exception as e: print(type(e)) save_as_exception(path, "words.rds", e)
def get_boundary(word_sig, function, smooth, window): """ Find the boundaries based on the defined function. Args: word_sig : An array of digital signal for each word. function : A function which will be apply on enveloped word. For example: np.argmin. smooth : Smoothing degree, here is 800. window (int) : The sample long window (For example: 1000) Returns: A list of indices which shows the boundaries. """ try: word_env = envelope(word_sig, smooth) indices = rolling_window(function, word_env, window) return np.array(indices) except Exception as e: save_as_exception("Root", "get boundary", e)
def envelope(sig, smooth): """ Compute the analytic signal, using the Hilbert transform. (Amplitude envelope) Args: sig : Signal data. Must be real. smooth : smoothing degree. Returns: Analytic signal of x, of each 1-D array along axis. """ try: analytic_signal = signal.hilbert(sig) amplitude_env = np.absolute(analytic_signal) if 0 < smooth < len(amplitude_env): smoothing_win = signal.windows.boxcar(smooth) / smooth smooth_env = np.convolve(amplitude_env, smoothing_win, mode='same') return smooth_env else: return amplitude_env except Exception as e: save_as_exception("Root", "Envelope", e)
def get_summary_freq_band(words_boundaries_parts, sample_rate): """ A feature summary consists of frequency band number, the first intensity value, the median of all values in the frequency band, the minimum and maximum intensity, the last intensity value, and chunk index. Args: words_boundaries_parts : A list of all words which have been split based on a defined function. sample_rate (int) : Sample rate. Returns: A list of all FBSFs for all words in the Corpus. """ fbsf = [] for word in words_boundaries_parts: try: band_cues = [] for index, part in enumerate(word, 1): spec = get_mel_spec(part, sample_rate) if spec is not None: for ii, band in enumerate(spec, 1): median = np.median(band) new_median = int(median) if '0' in str( median) else median band_cues.append( 'b{}start{}median{}min{}max{}end{}part{}'.format( ii, int(band[0]), new_median, int(band.min()), int(band.max()), int(band[-1]), index)) else: band_cues.append("tooShort") save_as_exception("Root", "get summary frequency bands", "too short happened") fbsf.append('_'.join(band_cues)) except Exception as e: save_as_exception("Root", "get summary frequency bands", e) pass return fbsf
def split_signal(word_signals, word_boundaries): """ Split a digital signal to some parts based on the boundaries Args: word_signals : signal of words word_boundaries : indices of boundaries Returns: split signals. """ word_tuples = zip(word_signals, word_boundaries) parts = [] try: for wav, bound in word_tuples: if len(bound) > 0: parts.append(np.split(wav, bound)) else: parts.append([wav]) return parts except Exception as e: save_as_exception("Root", "Splitting", e)