예제 #1
0
    def _append_trivial_tree(self, text_file, sync_root):
        """
        Append trivial tree, made by one HEAD,
        one sync map fragment for each element of ``text_file``,
        and one TAIL.

        This function is called if either ``text_file`` has only one element,
        or if ``sync_root.value`` is an interval with zero length
        (i.e., ``sync_root.value.begin == sync_root.value.end``).
        """
        interval = sync_root.value
        #
        # NOTE the following is correct, but it is a bit obscure
        # time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
        #
        if len(text_file) == 1:
            time_values = [interval.begin, interval.begin, interval.end, interval.end]
        else:
            # interval.begin == interval.end
            time_values = [interval.begin] * (3 + len(text_file))
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.intervals_to_fragment_list(
            text_file=text_file,
            time_values=time_values
        )
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)
예제 #2
0
    def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_root, force_aba_auto=False, leaf_level=False):
        """
        Adjust boundaries as requested by the user.

        Return the computed time map, that is,
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).
        """
        # boundary_indices contains the boundary indices in the all_mfcc of real_wave_mfcc
        # starting with the (head-1st fragment) and ending with (-1th fragment-tail)
        aba_parameters = self.task.configuration.aba_parameters()
        if force_aba_auto:
            self.log(u"Forced running algorithm: 'auto'")
            aba_parameters["algorithm"] = (AdjustBoundaryAlgorithm.AUTO, [])
            # note that the other aba settings (nonspeech and nozero)
            # remain as specified by the user
        self.log([u"ABA parameters: %s", aba_parameters])
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.adjust(
            aba_parameters=aba_parameters,
            real_wave_mfcc=real_wave_mfcc,
            boundary_indices=boundary_indices,
            text_file=text_file,
            allow_arbitrary_shift=leaf_level
        )
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)
예제 #3
0
    def _adjust_boundaries(self, text_map, real_wave_full_mfcc,
                           real_wave_length):
        """
        Adjust the boundaries between consecutive fragments.

        Return a pair:

        1. a success bool flag
        2. the computed interval map, that is,
           a list of triples ``[start_time, end_time, fragment_id]``

        """
        self._log("Adjusting boundaries")
        algo = self.task.configuration.adjust_boundary_algorithm
        value = None
        if algo is None:
            self._log("No adjust boundary algorithm specified: returning")
            return (True, text_map)
        elif algo == AdjustBoundaryAlgorithm.AUTO:
            self._log("Requested adjust boundary algorithm AUTO: returning")
            return (True, text_map)
        elif algo == AdjustBoundaryAlgorithm.AFTERCURRENT:
            value = self.task.configuration.adjust_boundary_aftercurrent_value
        elif algo == AdjustBoundaryAlgorithm.BEFORENEXT:
            value = self.task.configuration.adjust_boundary_beforenext_value
        elif algo == AdjustBoundaryAlgorithm.OFFSET:
            value = self.task.configuration.adjust_boundary_offset_value
        elif algo == AdjustBoundaryAlgorithm.PERCENT:
            value = self.task.configuration.adjust_boundary_percent_value
        elif algo == AdjustBoundaryAlgorithm.RATE:
            value = self.task.configuration.adjust_boundary_rate_value
        elif algo == AdjustBoundaryAlgorithm.RATEAGGRESSIVE:
            value = self.task.configuration.adjust_boundary_rate_value
        self._log(["Requested algo %s and value %s", algo, value])

        try:
            self._log("Running VAD...")
            vad = VAD(logger=self.logger)
            vad.wave_mfcc = real_wave_full_mfcc
            vad.wave_len = real_wave_length
            vad.compute_vad()
            self._log("Running VAD... done")
        except Exception as e:
            self._log("Adjusting boundaries: failed")
            self._log(["Message: %s", str(e)])
            return (False, None)

        self._log("Creating AdjustBoundaryAlgorithm object")
        adjust_boundary = AdjustBoundaryAlgorithm(algorithm=algo,
                                                  text_map=text_map,
                                                  speech=vad.speech,
                                                  nonspeech=vad.nonspeech,
                                                  value=value,
                                                  logger=self.logger)
        self._log("Adjusting boundaries...")
        adjusted_map = adjust_boundary.adjust()
        self._log("Adjusting boundaries... done")
        self._log("Adjusting boundaries: succeeded")
        return (True, adjusted_map)
 def run_aba(self, algorithm, value, expected):
     aba = AdjustBoundaryAlgorithm(
         algorithm=algorithm,
         text_map=self.TEXT_MAP,
         speech=self.SPEECH,
         nonspeech=self.NONSPEECH,
         value=value
     )
     adjusted_map = aba.adjust()
     self.assertEqual(self.maps_are_equal(adjusted_map, self.TEXT_MAP), expected)
예제 #5
0
    def _adjust_boundaries(self,
                           real_wave_mfcc,
                           text_file,
                           boundary_indices,
                           adjust_boundaries=True):
        """
        Adjust boundaries as requested by the user.

        Return the computed time map, that is,
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).
        """
        # boundary_indices contains the boundary indices in the all_mfcc of real_wave_mfcc
        # starting with the (head-1st fragment) and ending with (-1th fragment-tail)
        if adjust_boundaries:
            aba_algorithm, aba_parameters = self.task.configuration.aba_parameters(
            )
            self.log([u"Running algorithm: '%s'", aba_algorithm])
        else:
            self.log(u"Forced running algorithm: 'auto'")
            aba_algorithm = AdjustBoundaryAlgorithm.AUTO
            aba_parameters = None
        return AdjustBoundaryAlgorithm(algorithm=aba_algorithm,
                                       parameters=aba_parameters,
                                       real_wave_mfcc=real_wave_mfcc,
                                       boundary_indices=boundary_indices,
                                       text_file=text_file,
                                       rconf=self.rconf,
                                       logger=self.logger).to_time_map()
예제 #6
0
    def _append_trivial_tree(self, text_file, sync_root):
        """
        Append trivial tree, made by one HEAD,
        one sync map fragment for each element of ``text_file``,
        and one TAIL.

        This function is called if either ``text_file`` has only one element,
        or if ``sync_root.value`` is an interval with zero length
        (i.e., ``sync_root.value.begin == sync_root.value.end``).
        """
        interval = sync_root.value
        #
        # NOTE the following is correct, but it is a bit obscure
        # time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
        #
        if len(text_file) == 1:
            time_values = [
                interval.begin, interval.begin, interval.end, interval.end
            ]
        else:
            # interval.begin == interval.end
            time_values = [interval.begin] * (3 + len(text_file))
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.intervals_to_fragment_list(text_file=text_file,
                                       time_values=time_values)
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)
예제 #7
0
    def _adjust_boundaries(self,
                           boundary_indices,
                           text_file,
                           real_wave_mfcc,
                           sync_root,
                           force_aba_auto=False,
                           leaf_level=False):
        """
        Adjust boundaries as requested by the user.

        Return the computed time map, that is,
        a list of pairs ``[start_time, end_time]``,
        of length equal to number of fragments + 2,
        where the two extra elements are for
        the HEAD (first) and TAIL (last).
        """
        # boundary_indices contains the boundary indices in the all_mfcc of real_wave_mfcc
        # starting with the (head-1st fragment) and ending with (-1th fragment-tail)
        aba_parameters = self.task.configuration.aba_parameters()
        if force_aba_auto:
            self.log(u"Forced running algorithm: 'auto'")
            aba_parameters["algorithm"] = (AdjustBoundaryAlgorithm.AUTO, [])
            # note that the other aba settings (nonspeech and nozero)
            # remain as specified by the user
        self.log([u"ABA parameters: %s", aba_parameters])
        aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
        aba.adjust(aba_parameters=aba_parameters,
                   real_wave_mfcc=real_wave_mfcc,
                   boundary_indices=boundary_indices,
                   text_file=text_file,
                   allow_arbitrary_shift=leaf_level)
        aba.append_fragment_list_to_sync_root(sync_root=sync_root)
예제 #8
0
    def _adjust_boundaries(
            self,
            text_map,
            real_wave_full_mfcc,
            real_wave_length
        ):
        """
        Adjust the boundaries between consecutive fragments.

        Return a pair:

        1. a success bool flag
        2. the computed interval map, that is,
           a list of triples ``[start_time, end_time, fragment_id]``

        """
        self._log("Adjusting boundaries")
        algo = self.task.configuration.adjust_boundary_algorithm
        value = None
        if algo is None:
            self._log("No adjust boundary algorithm specified: returning")
            return (True, text_map)
        elif algo == AdjustBoundaryAlgorithm.AUTO:
            self._log("Requested adjust boundary algorithm AUTO: returning")
            return (True, text_map)
        elif algo == AdjustBoundaryAlgorithm.AFTERCURRENT:
            value = self.task.configuration.adjust_boundary_aftercurrent_value
        elif algo == AdjustBoundaryAlgorithm.BEFORENEXT:
            value = self.task.configuration.adjust_boundary_beforenext_value
        elif algo == AdjustBoundaryAlgorithm.OFFSET:
            value = self.task.configuration.adjust_boundary_offset_value
        elif algo == AdjustBoundaryAlgorithm.PERCENT:
            value = self.task.configuration.adjust_boundary_percent_value
        elif algo == AdjustBoundaryAlgorithm.RATE:
            value = self.task.configuration.adjust_boundary_rate_value
        elif algo == AdjustBoundaryAlgorithm.RATEAGGRESSIVE:
            value = self.task.configuration.adjust_boundary_rate_value
        self._log(["Requested algo %s and value %s", algo, value])

        try:
            self._log("Running VAD...")
            vad = VAD(logger=self.logger)
            vad.wave_mfcc = real_wave_full_mfcc
            vad.wave_len = real_wave_length
            vad.compute_vad()
            self._log("Running VAD... done")
        except Exception as e:
            self._log("Adjusting boundaries: failed")
            self._log(["Message: %s", str(e)])
            return (False, None)

        self._log("Creating AdjustBoundaryAlgorithm object")
        adjust_boundary = AdjustBoundaryAlgorithm(
            algorithm=algo,
            text_map=text_map,
            speech=vad.speech,
            nonspeech=vad.nonspeech,
            value=value,
            logger=self.logger
        )
        self._log("Adjusting boundaries...")
        adjusted_map = adjust_boundary.adjust()
        self._log("Adjusting boundaries... done")
        self._log("Adjusting boundaries: succeeded")
        return (True, adjusted_map)