Exemple #1
0
    def run_vad(self):
        """
        Determine which frames contain speech and nonspeech,
        and store the resulting boolean mask internally.
        """
        def _compute_runs(array):
            """
            Compute runs as a list of arrays,
            each containing the indices of a contiguous run.

            :param array: the data array
            :type  array: :class:`numpy.ndarray` (1D)
            :rtype: list of :class:`numpy.ndarray` (1D)
            """
            if len(array) < 1:
                return []
            return numpy.split(array,
                               numpy.where(numpy.diff(array) != 1)[0] + 1)

        self.log(u"Creating VAD object")
        vad = VAD(rconf=self.rconf, logger=self.logger)
        self.log(u"Running VAD...")
        self.__mfcc_mask = vad.run_vad(self.__mfcc[0])
        self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0]
        self.log(u"Running VAD... done")
        self.log(u"Storing speech and nonspeech intervals...")
        # where( == True) already computed, reusing
        # COMMENTED runs = _compute_runs((numpy.where(self.__mfcc_mask))[0])
        runs = _compute_runs(self.__mfcc_mask_map)
        self.__speech_intervals = [(r[0], r[-1]) for r in runs]
        # where( == False) not already computed, computing now
        runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0])
        self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs]
        self.log(u"Storing speech and nonspeech intervals... done")
Exemple #2
0
    def run_vad(self):
        """
        Determine which frames contain speech and nonspeech,
        and store the resulting boolean mask internally.
        """
        def _compute_runs(array):
            """
            Compute runs as a list of arrays,
            each containing the indices of a contiguous run.

            :param array: the data array
            :type  array: :class:`numpy.ndarray` (1D)
            :rtype: list of :class:`numpy.ndarray` (1D)
            """
            if len(array) < 1:
                return []
            return numpy.split(array, numpy.where(numpy.diff(array) != 1)[0] + 1)
        self.log(u"Creating VAD object")
        vad = VAD(rconf=self.rconf, logger=self.logger)
        self.log(u"Running VAD...")
        self.__mfcc_mask = vad.run_vad(self.__mfcc[0])
        self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0]
        self.log(u"Running VAD... done")
        self.log(u"Storing speech and nonspeech intervals...")
        # where( == True) already computed, reusing
        #runs = _compute_runs((numpy.where(self.__mfcc_mask))[0])
        runs = _compute_runs(self.__mfcc_mask_map)
        self.__speech_intervals = [(r[0], r[-1]) for r in runs]
        # where( == False) not already computed, computing now
        runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0])
        self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs]
        self.log(u"Storing speech and nonspeech intervals... done")
Exemple #3
0
    def run_vad(
        self,
        log_energy_threshold=None,
        min_nonspeech_length=None,
        extend_before=None,
        extend_after=None
    ):
        """
        Determine which frames contain speech and nonspeech,
        and store the resulting boolean mask internally.

        The four parameters might be ``None``:
        in this case, the corresponding RuntimeConfiguration values
        are applied.

        :param float log_energy_threshold: the minimum log energy threshold to consider a frame as speech
        :param int min_nonspeech_length: the minimum length, in frames, of a nonspeech interval
        :param int extend_before: extend each speech interval by this number of frames to the left (before)
        :param int extend_after: extend each speech interval by this number of frames to the right (after)
        """
        def _compute_runs(array):
            """
            Compute runs as a list of arrays,
            each containing the indices of a contiguous run.

            :param array: the data array
            :type  array: :class:`numpy.ndarray` (1D)
            :rtype: list of :class:`numpy.ndarray` (1D)
            """
            if len(array) < 1:
                return []
            return numpy.split(array, numpy.where(numpy.diff(array) != 1)[0] + 1)
        self.log(u"Creating VAD object")
        vad = VAD(rconf=self.rconf, logger=self.logger)
        self.log(u"Running VAD...")
        self.__mfcc_mask = vad.run_vad(
            wave_energy=self.__mfcc[0],
            log_energy_threshold=log_energy_threshold,
            min_nonspeech_length=min_nonspeech_length,
            extend_before=extend_before,
            extend_after=extend_after
        )
        self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0]
        self.log(u"Running VAD... done")
        self.log(u"Storing speech and nonspeech intervals...")
        # where( == True) already computed, reusing
        # COMMENTED runs = _compute_runs((numpy.where(self.__mfcc_mask))[0])
        runs = _compute_runs(self.__mfcc_mask_map)
        self.__speech_intervals = [(r[0], r[-1]) for r in runs]
        # where( == False) not already computed, computing now
        runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0])
        self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs]
        self.log(u"Storing speech and nonspeech intervals... done")
Exemple #4
0
    def run_vad(self,
                log_energy_threshold=None,
                min_nonspeech_length=None,
                extend_before=None,
                extend_after=None):
        """
        Determine which frames contain speech and nonspeech,
        and store the resulting boolean mask internally.

        The four parameters might be ``None``:
        in this case, the corresponding RuntimeConfiguration values
        are applied.

        :param float log_energy_threshold: the minimum log energy threshold to consider a frame as speech
        :param int min_nonspeech_length: the minimum length, in frames, of a nonspeech interval
        :param int extend_before: extend each speech interval by this number of frames to the left (before)
        :param int extend_after: extend each speech interval by this number of frames to the right (after)
        """
        def _compute_runs(array):
            """
            Compute runs as a list of arrays,
            each containing the indices of a contiguous run.

            :param array: the data array
            :type  array: :class:`numpy.ndarray` (1D)
            :rtype: list of :class:`numpy.ndarray` (1D)
            """
            if len(array) < 1:
                return []
            return numpy.split(array,
                               numpy.where(numpy.diff(array) != 1)[0] + 1)

        self.log(u"Creating VAD object")
        vad = VAD(rconf=self.rconf, logger=self.logger)
        self.log(u"Running VAD...")
        self.__mfcc_mask = vad.run_vad(
            wave_energy=self.__mfcc[0],
            log_energy_threshold=log_energy_threshold,
            min_nonspeech_length=min_nonspeech_length,
            extend_before=extend_before,
            extend_after=extend_after)
        self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0]
        self.log(u"Running VAD... done")
        self.log(u"Storing speech and nonspeech intervals...")
        # where( == True) already computed, reusing
        # COMMENTED runs = _compute_runs((numpy.where(self.__mfcc_mask))[0])
        runs = _compute_runs(self.__mfcc_mask_map)
        self.__speech_intervals = [(r[0], r[-1]) for r in runs]
        # where( == False) not already computed, computing now
        runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0])
        self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs]
        self.log(u"Storing speech and nonspeech intervals... done")