예제 #1
0
    def _load_original_matrix(self):
        """Loades the original mgc matrix for the test file.

        This helper method loads the original matrix from the *.mgc file and
        matrix for the test file and also computes the staring times of the
        different phones.

        :returns: the original mgc matrix
        :returns: starting times of the phones
        """
        mgc_matrix = np.fromfile(MGC_DIR + self._filename + '.mgc',
                                 dtype=np.float32).reshape(-1, MGCORD + 1)

        label = Label(LABEL_DIR + self._filename + '.lab')
        step_size = mgc_matrix.shape[0] / label.last_phone_end
        phone_starts = [
            int(round(p[1] * step_size)) for p in label.cur_phones_additions()
        ]

        return mgc_matrix, phone_starts
예제 #2
0
def plot_phones(data_dir, max_occurrences=10):
    """Creates plots for all phones found in label files.

    This function creates a plot for every phone it finds in a label file in
    the given data directory. By default it only creates ten plots per phone.

    :params data_dir: the directory to search for label, mgc and text files
    :params max_occurrences: number of plots to make per single phone
    """
    mgc_dir = data_dir + '/mgc/'
    txt_dir = data_dir + '/txt/'
    label_dir = data_dir + '/labels/full/'

    filenames = list(
        map(lambda x: x.replace(mgc_dir, '').replace('mgc', ''),
            glob(mgc_dir + '*')))
    filenames.sort()
    phones = Counter()
    for f in filenames:
        with open(txt_dir + f + 'txt', 'r') as txt_line:
            label = Label(label_dir + f + 'lab')
            mgc = np.fromfile(mgc_dir + f + 'mgc',
                              dtype=np.float32).reshape(-1, MGCORD + 1)
            txt = txt_line.readline()

            for p in label.cur_phones_additions():
                if phones[p[0]] >= NR_OCCURENCES:
                    continue
                phones[p[0]] += 1

                if not os.path.exists(OUT_DIR + p[0]):
                    if not os.path.exists(OUT_DIR):
                        os.mkdir(OUT_DIR)
                    os.mkdir(OUT_DIR + p[0])

                out_filename = OUT_DIR + p[0] + '/' + p[
                    0] + '_{:03d}.png'.format(phones[p[0]])
                mark_phone(mgc, txt, p[3].split('@')[0].split(' ')[-1], p[1],
                           p[2], out_filename)
                print('Plot saved as {:s}'.format(out_filename))
예제 #3
0
class BFCR:
    """ This class is a basis function representation of a given feature.

    It can decompose a given feature into a coefficients for basis functions,
    it uses Legendre polynomes to do so.
    Additional functionality is e.g. plotting a given component or blending
    between phone-borders of a recomposed matrix.

    """
    def __init__(self, label_file=None):
        """Initialises an instance.

        Note that a lable file doesn't necessarily have to be given at creation
        of an instance, it can also be set later with the method load_label()

        :params label_file: the label on which the BFCR instance is based
        """
        self._encoded_features = {}
        self._len_phones = {}
        self._original_matrix = {}
        if label_file:
            self.label = Label(label_file)
            self.label_file = label_file
        else:
            self.label = None
            self.label_file = None

    def load_label(self, filename):
        """Loads a label if not already loaded.

        :params filename: filename of the label
        :raises Exception: if a label is already loaded
        """
        if self.label is None:
            self.label = Label(filename)
            self.label_file = filename
        else:
            raise Exception('Label is already loaded')

    def encode_feature(self,
                       feature_matrix,
                       feature_name,
                       num_bases=NUM_BASES):
        """Encodes a given feature.

        This method decomposes a given matrix into a matrix of cofficents of
        lengrande basis functions. The default value of basis functions to
        encode the matrix is given in config.py.

        :params feature_matrix: matrix to encode
        :params feature_name: name of the encoded feature
        :params num_bases: number of basis functions to encode
        :raises Exception: if no label is loaded
        """
        if self.label is None:
            raise Exception('No label file was loaded')

        num_components = feature_matrix.shape[1]
        step_size = feature_matrix.shape[0] / self.label.last_phone_end
        tensor = np.empty((self.label.num_phones, num_components, num_bases),
                          dtype=np.float32)
        self._len_phones[feature_name] = []
        self._original_matrix[feature_name] = feature_matrix

        for i, phone in enumerate(self.label.cur_phones_additions()):
            phone_begin_index = int(round(phone[1] * step_size))
            phone_end_index = int(round(phone[2] * step_size))

            self._len_phones[feature_name].append(
                (phone_begin_index, phone_end_index))

            for component in range(num_components):
                signal_snippet = feature_matrix[
                    phone_begin_index:phone_end_index, component]
                x_values = np.linspace(-1, 1, len(signal_snippet))
                coeff = np.polynomial.legendre.legfit(x_values, signal_snippet,
                                                      num_bases - 1)
                tensor[i, component, :] = coeff

        self._encoded_features[feature_name] = tensor

    def decode_feature(self, feature_name, blending_time=None):
        """ Decodes a given feature.
        This method recomposes the matrix of basis function coefficients into a
        regular feature matrix. If blending_time is set the returned matrix
        will over the phone boarder for a given amount of milliseconds in both
        directions. By default no blending time is used.

        :params feature_name: name of the feature to decode
        :params blending_time: time to blend over the phone borders
        :returns: the recomposed matrix for the given feature
        """
        self._check_feature(feature_name)

        utterance_length = max(max(self._len_phones[feature_name]))
        reconstructed_matrix = np.zeros(
            (utterance_length, self._encoded_features[feature_name].shape[1]),
            dtype=np.float32)

        for phone in range(len(self._len_phones[feature_name])):
            cur_phone_start = self._len_phones[feature_name][phone][0]
            cur_phone_end = self._len_phones[feature_name][phone][1]
            resample_size = len(range(cur_phone_start, cur_phone_end))
            x_values = np.linspace(-1, 1, resample_size)
            coeff = self._encoded_features[feature_name][phone][:][:]
            signal_snippet = np.zeros((len(x_values), coeff.shape[0]),
                                      dtype=np.float32)

            for i in range(coeff.shape[0]):
                signal_snippet[:, i] = np.polynomial.legendre.legval(
                    x_values, coeff[i, :])

            reconstructed_matrix[
                cur_phone_start:cur_phone_end] = signal_snippet

        if blending_time:
            reconstructed_matrix = self._blend_borders(feature_name,
                                                       reconstructed_matrix,
                                                       blending_time)

        return reconstructed_matrix

    def save_to_file(self, filename):
        """Saves the bfcr instance into a binary file

        :params filename: name of the file where to save the BFCR instance
        """
        with open(filename, 'wb') as f:
            pickle.dump(self, f)

    def read_from_file(self, filename):
        """Loads a previous saved bfcr instance

        :params filename: name of the file to load the bfcr instance
        """
        with open(filename, 'rb') as f:
            restored = pickle.load(f)
            self.label = restored.label
            self._encoded_features = restored._encoded_features
            self._len_phones = restored._len_phones
            self._original_matrix = restored._original_matrix
            self.label_file = restored.label_file

    def plot_component(self, feature_name, filename, component_num=0):
        """Plots a component of a given feature

        This method creates a plot of a given feature and the original matrix
        of this feature. By default it plots only the first component of the
        matix.

        :params feature_name: name of the feature to plot
        :params filename: name of the file where the plot gets saved
        :params component_num: number of the component to plot
        """
        path = os.path.dirname(filename)
        if not os.path.exists(path):
            os.makedirs(path)

        xmax = self._original_matrix[feature_name].shape[0]
        f, ax = plt.subplots(1, 1, figsize=(18, 6))
        x = np.linspace(0, xmax, xmax)
        ax.plot(x,
                self._original_matrix[feature_name][:, component_num],
                label='Original')
        ax.plot(x,
                self.decode_feature(feature_name)[:, component_num],
                '.',
                label='Reconstructed')
        ax.set_xlim(xmin=0, xmax=xmax)
        ax.legend()
        f.savefig(filename)
        plt.close()

    def original_matrix(self, feature_name):
        """Getter for the original matrix.

        :params feature_name: the feature of which to get the original matrix
        :returns: the original matrix of the given feature
        """
        return self._original_matrix[feature_name]

    def phone_coefficients(self, feature_name):
        """Get for basis function coefficients for all the phones. """
        self._check_feature(feature_name)
        num_phones = self._encoded_features[feature_name].shape[0]
        return np.reshape(self._encoded_features[feature_name],
                          (num_phones, (MGCORD + 1) * NUM_BASES))

    @property
    def encoded_features(self):
        """Get all encoded features."""
        return self._encoded_features

    @encoded_features.setter
    def encoded_features(self, value):
        """Set for encoded features."""
        if not self.label:
            raise Exception(
                'No label file was loaded, labels are needed for assigning phone lenght'
            )

        if type(value) is not dict:
            raise TypeError('encoded_features has to be assigned to a dict')

        for i in value.keys():
            self._encoded_features[i] = value[i]
            step_size = SAMPFREQ / FRAMESHIFT
            self._len_phones[i] = []
            for phone in self.label.cur_phones_additions():
                phone_begin_index = int(round(phone[1] * step_size))
                phone_end_index = int(round(phone[2] * step_size))

                self._len_phones[i].append(
                    (phone_begin_index, phone_end_index))

    def _blend_borders(self, feature_name, matrix, blending_time=25):
        """Blends over the borders of one phone to the next.

        This helper method blend from the end of one phone to the beginning of
        the following one. The default value of 25ms was found to give the best
        results.

        :params feature_name: name of the feature matrix to do the blending
        :params matrix: matrix to blend
        :params blending_time: time to blend over each border in milliseconds
        :returns: the given matrix with blended phone borders
        """
        blending_time = 1 / 1000 * blending_time
        phone_borders = [
            phone[2] for phone in self.label.cur_phones_additions()
        ]

        last_time = phone_borders[-1]
        last_index = self._len_phones[feature_name][-1][1]
        step = last_time / last_index

        for i in range(len(phone_borders)):
            if i == 0 or i == len(phone_borders) - 1:
                continue

            if phone_borders[i] - blending_time < phone_borders[
                    i -
                    1] or phone_borders[i] + blending_time > phone_borders[i +
                                                                           1]:
                continue

            start = phone_borders[i] - blending_time
            end = phone_borders[i] + blending_time

            blend_index_start = round(start / step)
            blend_index_end = round(end / step) - 1

            blend_start_values = matrix[blend_index_start, :]
            blend_end_values = matrix[blend_index_end, :]
            blend_factors = np.linspace(1, 0,
                                        blend_index_end - blend_index_start)

            for j in range(len(blend_factors)):
                blend_factor = blend_factors[j]
                matrix[blend_index_start +
                       j, :] = blend_factor * blend_start_values[:] + (
                           1 - blend_factor) * blend_end_values[:]

        return matrix

    def _check_feature(self, feature_name):
        """Checks if a feature is encoded, raises an exeption if not.

        :raises Exception. if the requested feature is not encoded.
        """
        try:
            self._len_phones[feature_name]
            self._encoded_features[feature_name]
        except KeyError:
            raise Exception(
                'Feature "{:s}" is not encoded'.format(feature_name))