def _load_original_matrix(self): """Loades the original mgc matrix for the test file. This helper method loads the original matrix from the *.mgc file and matrix for the test file and also computes the staring times of the different phones. :returns: the original mgc matrix :returns: starting times of the phones """ mgc_matrix = np.fromfile(MGC_DIR + self._filename + '.mgc', dtype=np.float32).reshape(-1, MGCORD + 1) label = Label(LABEL_DIR + self._filename + '.lab') step_size = mgc_matrix.shape[0] / label.last_phone_end phone_starts = [ int(round(p[1] * step_size)) for p in label.cur_phones_additions() ] return mgc_matrix, phone_starts
def plot_phones(data_dir, max_occurrences=10): """Creates plots for all phones found in label files. This function creates a plot for every phone it finds in a label file in the given data directory. By default it only creates ten plots per phone. :params data_dir: the directory to search for label, mgc and text files :params max_occurrences: number of plots to make per single phone """ mgc_dir = data_dir + '/mgc/' txt_dir = data_dir + '/txt/' label_dir = data_dir + '/labels/full/' filenames = list( map(lambda x: x.replace(mgc_dir, '').replace('mgc', ''), glob(mgc_dir + '*'))) filenames.sort() phones = Counter() for f in filenames: with open(txt_dir + f + 'txt', 'r') as txt_line: label = Label(label_dir + f + 'lab') mgc = np.fromfile(mgc_dir + f + 'mgc', dtype=np.float32).reshape(-1, MGCORD + 1) txt = txt_line.readline() for p in label.cur_phones_additions(): if phones[p[0]] >= NR_OCCURENCES: continue phones[p[0]] += 1 if not os.path.exists(OUT_DIR + p[0]): if not os.path.exists(OUT_DIR): os.mkdir(OUT_DIR) os.mkdir(OUT_DIR + p[0]) out_filename = OUT_DIR + p[0] + '/' + p[ 0] + '_{:03d}.png'.format(phones[p[0]]) mark_phone(mgc, txt, p[3].split('@')[0].split(' ')[-1], p[1], p[2], out_filename) print('Plot saved as {:s}'.format(out_filename))
class BFCR: """ This class is a basis function representation of a given feature. It can decompose a given feature into a coefficients for basis functions, it uses Legendre polynomes to do so. Additional functionality is e.g. plotting a given component or blending between phone-borders of a recomposed matrix. """ def __init__(self, label_file=None): """Initialises an instance. Note that a lable file doesn't necessarily have to be given at creation of an instance, it can also be set later with the method load_label() :params label_file: the label on which the BFCR instance is based """ self._encoded_features = {} self._len_phones = {} self._original_matrix = {} if label_file: self.label = Label(label_file) self.label_file = label_file else: self.label = None self.label_file = None def load_label(self, filename): """Loads a label if not already loaded. :params filename: filename of the label :raises Exception: if a label is already loaded """ if self.label is None: self.label = Label(filename) self.label_file = filename else: raise Exception('Label is already loaded') def encode_feature(self, feature_matrix, feature_name, num_bases=NUM_BASES): """Encodes a given feature. This method decomposes a given matrix into a matrix of cofficents of lengrande basis functions. The default value of basis functions to encode the matrix is given in config.py. :params feature_matrix: matrix to encode :params feature_name: name of the encoded feature :params num_bases: number of basis functions to encode :raises Exception: if no label is loaded """ if self.label is None: raise Exception('No label file was loaded') num_components = feature_matrix.shape[1] step_size = feature_matrix.shape[0] / self.label.last_phone_end tensor = np.empty((self.label.num_phones, num_components, num_bases), dtype=np.float32) self._len_phones[feature_name] = [] self._original_matrix[feature_name] = feature_matrix for i, phone in enumerate(self.label.cur_phones_additions()): phone_begin_index = int(round(phone[1] * step_size)) phone_end_index = int(round(phone[2] * step_size)) self._len_phones[feature_name].append( (phone_begin_index, phone_end_index)) for component in range(num_components): signal_snippet = feature_matrix[ phone_begin_index:phone_end_index, component] x_values = np.linspace(-1, 1, len(signal_snippet)) coeff = np.polynomial.legendre.legfit(x_values, signal_snippet, num_bases - 1) tensor[i, component, :] = coeff self._encoded_features[feature_name] = tensor def decode_feature(self, feature_name, blending_time=None): """ Decodes a given feature. This method recomposes the matrix of basis function coefficients into a regular feature matrix. If blending_time is set the returned matrix will over the phone boarder for a given amount of milliseconds in both directions. By default no blending time is used. :params feature_name: name of the feature to decode :params blending_time: time to blend over the phone borders :returns: the recomposed matrix for the given feature """ self._check_feature(feature_name) utterance_length = max(max(self._len_phones[feature_name])) reconstructed_matrix = np.zeros( (utterance_length, self._encoded_features[feature_name].shape[1]), dtype=np.float32) for phone in range(len(self._len_phones[feature_name])): cur_phone_start = self._len_phones[feature_name][phone][0] cur_phone_end = self._len_phones[feature_name][phone][1] resample_size = len(range(cur_phone_start, cur_phone_end)) x_values = np.linspace(-1, 1, resample_size) coeff = self._encoded_features[feature_name][phone][:][:] signal_snippet = np.zeros((len(x_values), coeff.shape[0]), dtype=np.float32) for i in range(coeff.shape[0]): signal_snippet[:, i] = np.polynomial.legendre.legval( x_values, coeff[i, :]) reconstructed_matrix[ cur_phone_start:cur_phone_end] = signal_snippet if blending_time: reconstructed_matrix = self._blend_borders(feature_name, reconstructed_matrix, blending_time) return reconstructed_matrix def save_to_file(self, filename): """Saves the bfcr instance into a binary file :params filename: name of the file where to save the BFCR instance """ with open(filename, 'wb') as f: pickle.dump(self, f) def read_from_file(self, filename): """Loads a previous saved bfcr instance :params filename: name of the file to load the bfcr instance """ with open(filename, 'rb') as f: restored = pickle.load(f) self.label = restored.label self._encoded_features = restored._encoded_features self._len_phones = restored._len_phones self._original_matrix = restored._original_matrix self.label_file = restored.label_file def plot_component(self, feature_name, filename, component_num=0): """Plots a component of a given feature This method creates a plot of a given feature and the original matrix of this feature. By default it plots only the first component of the matix. :params feature_name: name of the feature to plot :params filename: name of the file where the plot gets saved :params component_num: number of the component to plot """ path = os.path.dirname(filename) if not os.path.exists(path): os.makedirs(path) xmax = self._original_matrix[feature_name].shape[0] f, ax = plt.subplots(1, 1, figsize=(18, 6)) x = np.linspace(0, xmax, xmax) ax.plot(x, self._original_matrix[feature_name][:, component_num], label='Original') ax.plot(x, self.decode_feature(feature_name)[:, component_num], '.', label='Reconstructed') ax.set_xlim(xmin=0, xmax=xmax) ax.legend() f.savefig(filename) plt.close() def original_matrix(self, feature_name): """Getter for the original matrix. :params feature_name: the feature of which to get the original matrix :returns: the original matrix of the given feature """ return self._original_matrix[feature_name] def phone_coefficients(self, feature_name): """Get for basis function coefficients for all the phones. """ self._check_feature(feature_name) num_phones = self._encoded_features[feature_name].shape[0] return np.reshape(self._encoded_features[feature_name], (num_phones, (MGCORD + 1) * NUM_BASES)) @property def encoded_features(self): """Get all encoded features.""" return self._encoded_features @encoded_features.setter def encoded_features(self, value): """Set for encoded features.""" if not self.label: raise Exception( 'No label file was loaded, labels are needed for assigning phone lenght' ) if type(value) is not dict: raise TypeError('encoded_features has to be assigned to a dict') for i in value.keys(): self._encoded_features[i] = value[i] step_size = SAMPFREQ / FRAMESHIFT self._len_phones[i] = [] for phone in self.label.cur_phones_additions(): phone_begin_index = int(round(phone[1] * step_size)) phone_end_index = int(round(phone[2] * step_size)) self._len_phones[i].append( (phone_begin_index, phone_end_index)) def _blend_borders(self, feature_name, matrix, blending_time=25): """Blends over the borders of one phone to the next. This helper method blend from the end of one phone to the beginning of the following one. The default value of 25ms was found to give the best results. :params feature_name: name of the feature matrix to do the blending :params matrix: matrix to blend :params blending_time: time to blend over each border in milliseconds :returns: the given matrix with blended phone borders """ blending_time = 1 / 1000 * blending_time phone_borders = [ phone[2] for phone in self.label.cur_phones_additions() ] last_time = phone_borders[-1] last_index = self._len_phones[feature_name][-1][1] step = last_time / last_index for i in range(len(phone_borders)): if i == 0 or i == len(phone_borders) - 1: continue if phone_borders[i] - blending_time < phone_borders[ i - 1] or phone_borders[i] + blending_time > phone_borders[i + 1]: continue start = phone_borders[i] - blending_time end = phone_borders[i] + blending_time blend_index_start = round(start / step) blend_index_end = round(end / step) - 1 blend_start_values = matrix[blend_index_start, :] blend_end_values = matrix[blend_index_end, :] blend_factors = np.linspace(1, 0, blend_index_end - blend_index_start) for j in range(len(blend_factors)): blend_factor = blend_factors[j] matrix[blend_index_start + j, :] = blend_factor * blend_start_values[:] + ( 1 - blend_factor) * blend_end_values[:] return matrix def _check_feature(self, feature_name): """Checks if a feature is encoded, raises an exeption if not. :raises Exception. if the requested feature is not encoded. """ try: self._len_phones[feature_name] self._encoded_features[feature_name] except KeyError: raise Exception( 'Feature "{:s}" is not encoded'.format(feature_name))