Ejemplo n.º 1
0
    def extract_dur_features(self, orig_file, output_file):
        io_funcs = BinaryIOCollection()
        totalMat = io_funcs.file2matrix(orig_file)
        self.label_dimension = totalMat.shape[1] - 5  # collum num
        durMat = totalMat[:, -5:]

        io_funcs.array_to_binary_file(durMat, output_file)
Ejemplo n.º 2
0
    def extract_label_features(self, orig_file, output_file):
        io_funcs = BinaryIOCollection()
        totalMat = io_funcs.file2matrix(orig_file)
        self.label_dimension = totalMat.shape[1] - 5  # collum num
        labelMat = totalMat[:, :-5]
        #print orig_file, totalMat.shape, labelMat.shape

        io_funcs.array_to_binary_file(labelMat, output_file)
Ejemplo n.º 3
0
    def compose_predict_label(self, orig_label_file, gen_label_file,
                              predict_duration_file):
        io_funcs = BinaryIOCollection()
        origMat = io_funcs.file2matrix(orig_label_file)

        state_number = 5
        duration, in_frame_number = io_funcs.load_binary_file_frame(
            predict_duration_file, state_number)
        assert origMat.shape[0] == in_frame_number
        origMat[:, -5:] = duration
        origMat = origMat.astype(int)
        io_funcs.matrix2file(origMat, gen_label_file)
Ejemplo n.º 4
0
    def extract_acousitc_label_features(self, orig_file, output_file):
        io_funcs = BinaryIOCollection()
        totalMat = io_funcs.file2matrix(orig_file, numpy.int)
        labelMat = totalMat[:, :-5]
        durMat = totalMat[:, -5:]

        label_len = totalMat.shape[1] - 5

        self.label_dimension = label_len + 9

        phone_number = labelMat.shape[0]

        label_feature_matrix = numpy.empty((100000, self.label_dimension))

        state_number = 5
        label_feature_index = 0

        for phone_index in xrange(phone_number):
            label_vector = labelMat[phone_index, :]
            state_vector = durMat[phone_index, :]
            phone_duration = 0
            state_duration_bases = numpy.zeros((5, ), dtype=numpy.int)
            for state_index in xrange(state_number):
                state_duration_bases[state_index] = phone_duration
                phone_duration = phone_duration + state_vector[state_index]

            for state_index in xrange(state_number):
                frame_number = state_vector[state_index]
                current_block_binary_array = numpy.zeros(
                    (frame_number, self.label_dimension))
                state_duration_base = state_duration_bases[state_index]
                state_index_backward = state_number - state_index
                for i in xrange(frame_number):
                    current_block_binary_array[i, 0:label_len] = label_vector

                    current_block_binary_array[
                        i, label_len] = float(i + 1) / float(
                            frame_number)  ## fraction through state (forwards)
                    current_block_binary_array[
                        i, label_len + 1] = float(frame_number - i) / float(
                            frame_number
                        )  ## fraction through state (backwards)
                    current_block_binary_array[i, label_len + 2] = float(
                        frame_number)  ## length of state in frames
                    current_block_binary_array[i, label_len + 3] = float(
                        state_index)  ## state index (counting forwards)
                    current_block_binary_array[i, label_len + 4] = float(
                        state_index_backward
                    )  ## state index (counting backwards)

                    current_block_binary_array[i, label_len + 5] = float(
                        phone_duration)  ## length of phone in frames
                    current_block_binary_array[
                        i, label_len + 6] = float(frame_number) / float(
                            phone_duration
                        )  ## fraction of the phone made up by current state
                    current_block_binary_array[i, label_len + 7] = float(
                        phone_duration - i - state_duration_base) / float(
                            phone_duration
                        )  ## fraction through phone (forwards)
                    current_block_binary_array[
                        i, label_len +
                        8] = float(state_duration_base + i + 1) / float(
                            phone_duration
                        )  ## fraction through phone (backwards)

                label_feature_matrix[
                    label_feature_index:label_feature_index +
                    frame_number, ] = current_block_binary_array
                label_feature_index = label_feature_index + frame_number

        label_feature_matrix = label_feature_matrix[0:label_feature_index, ]

        #print label_feature_matrix.shape

        io_funcs.array_to_binary_file(label_feature_matrix, output_file)