class TestSD(unittest.TestCase): AUDIO_FILE = gf.absolute_path("res/audioformats/mono.16000.wav", __file__) TEXT_FILE = gf.absolute_path("res/inputtext/sonnet_plain.txt", __file__) def load(self): audio_file_mfcc = AudioFileMFCC(self.AUDIO_FILE) text_file = TextFile(self.TEXT_FILE, file_format=TextFileFormat.PLAIN) text_file.set_language(Language.ENG) return SD(audio_file_mfcc, text_file) def test_create_sd(self): sd = self.load() def test_detect_interval(self): begin, end = self.load().detect_interval() self.assertNotEqual(begin, 0.0) self.assertNotEqual(end, 0.0) def test_detect_head(self): head = self.load().detect_head() self.assertNotEqual(head, 0.0) def test_detect_head_min_max(self): head = self.load().detect_head(min_head_length=2.0, max_head_length=10.0) self.assertNotEqual(head, 0.0) self.assertGreaterEqual(head, 2.0) self.assertLessEqual(head, 10.0) def test_detect_tail(self): tail = self.load().detect_tail() self.assertNotEqual(tail, 0.0) def test_detect_tail_min_max(self): tail = self.load().detect_tail(min_tail_length=2.0, max_tail_length=10.0) self.assertNotEqual(tail, 0.0) self.assertGreaterEqual(tail, 2.0) self.assertLessEqual(tail, 10.0) def test_detect_bad(self): sd = self.load() with self.assertRaises(TypeError): begin, end = sd.detect_interval(min_head_length="foo") with self.assertRaises(ValueError): begin, end = sd.detect_interval(min_head_length=-10.0) with self.assertRaises(TypeError): begin, end = sd.detect_interval(max_head_length="foo") with self.assertRaises(ValueError): begin, end = sd.detect_interval(max_head_length=-10.0) with self.assertRaises(TypeError): begin, end = sd.detect_interval(min_tail_length="foo") with self.assertRaises(ValueError): begin, end = sd.detect_interval(min_tail_length=-10.0) with self.assertRaises(TypeError): begin, end = sd.detect_interval(max_tail_length="foo") with self.assertRaises(ValueError): begin, end = sd.detect_interval(max_tail_length=-10.0)
def execute(self, config_string, audio_path, text_path): handler, tmp_path = gf.tmp_file() task = Task(config_string) task.audio_file_path_absolute = gf.absolute_path(audio_path, __file__) task.text_file_path_absolute = gf.absolute_path(text_path, __file__) executor = ExecuteTask(task) executor.execute() task.sync_map_file_path_absolute = tmp_path result_path = task.output_sync_map_file() self.assertIsNotNone(result_path) self.assertEqual(result_path, tmp_path) self.assertGreater(len(gf.read_file_bytes(result_path)), 0) gf.delete_file(handler, tmp_path)
def check_ffmpeg(cls): """ Check whether ``ffmpeg`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__) handler, output_file_path = gf.tmp_file(suffix=u".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) gf.delete_file(handler, output_file_path) if result: gf.print_success(u"ffmpeg OK") return False except: pass gf.print_error(u"ffmpeg ERROR") gf.print_info( u" Please make sure you have ffmpeg installed correctly") gf.print_info( u" and that its path is in your PATH environment variable") return True
class PlotElement(Loggable): """ A generic element of a Plot. :param rconf: a runtime configuration :type rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration` :param logger: the logger object :type logger: :class:`~aeneas.logger.Logger` """ FONT_PATH = gf.absolute_path("res/LiberationMono-Regular.ttf", __file__) """ Path of the font to be used for drawing """ TICK_WIDTH = 2 """ A tick will be drawn with (1 + 2 times this value) pixels """ TEXT_MARGIN = 2 """ Margin between text and anchor point, in pixels """ TAG = u"PlotElement" def __init__(self, label=None, rconf=None, logger=None): super(PlotElement, self).__init__(rconf=rconf, logger=logger) self.label = label @property def height(self): """ The height of this element, in modules. :rtype: int """ return 0 @property def width(self): """ The width of this element, in modules. :rtype: int """ return 0 def text_bounding_box(self, size_pt, text): """ Return the bounding box of the given text at the given font size. :param int size_pt: the font size in points :param string text: the text :rtype: tuple (width, height) """ if size_pt == 12: mult = {"h": 9, "w_digit": 5, "w_space": 2} elif size_pt == 18: mult = {"h": 14, "w_digit": 9, "w_space": 2} num_chars = len(text) return (num_chars * mult["w_digit"] + (num_chars - 1) * mult["w_space"] + 1, mult["h"])
def _synthesize_single_helper(self, text, voice_code, output_file_path=None): """ This is an helper function to synthesize a single text fragment via Python call. The caller can choose whether the output file should be written to disk or not. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # but in general one might select a voice file to load, # depending on voice_code # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is not None: audio.save_riff(gf.safe_str(output_file_path)) # get length and data using speect Python API waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring(waveform["samples"], dtype=numpy.int16).astype("float64") / 32768 # return data return (audio_length, audio_sample_rate, audio_format, audio_samples)
def load(self, path, rp=False, rs=False): af = AudioFile(gf.absolute_path(path, __file__)) if rp: af.read_properties() if rs: af.read_samples_from_file() return af
def test_wizard_analyze_valid(self): f = self.FILES[0] analyzer = AnalyzeContainer( Container(gf.absolute_path(f["path"], __file__))) job = analyzer.analyze(config_string=self.CONFIG_STRING) self.assertIsNotNone(job) self.assertEqual(len(job), f["length"])
def output_html_for_tuning( self, audio_file_path, output_file_path, parameters=None ): """ Output an HTML file for fine tuning the sync map manually. :param string audio_file_path: the path to the associated audio file :param string output_file_path: the path to the output file to write :param dict parameters: additional parameters .. versionadded:: 1.3.1 """ if not gf.file_can_be_written(output_file_path): self.log_exc(u"Cannot output HTML file '%s'. Wrong permissions?" % (output_file_path), None, True, OSError) if parameters is None: parameters = {} audio_file_path_absolute = gf.fix_slash(os.path.abspath(audio_file_path)) template_path_absolute = gf.absolute_path(self.FINETUNEAS_PATH, __file__) with io.open(template_path_absolute, "r", encoding="utf-8") as file_obj: template = file_obj.read() for repl in self.FINETUNEAS_REPLACEMENTS: template = template.replace(repl[0], repl[1]) template = template.replace( self.FINETUNEAS_REPLACE_AUDIOFILEPATH, u"audioFilePath = \"file://%s\";" % audio_file_path_absolute ) template = template.replace( self.FINETUNEAS_REPLACE_FRAGMENTS, u"fragments = (%s).fragments;" % self.json_string ) if gc.PPN_TASK_OS_FILE_FORMAT in parameters: output_format = parameters[gc.PPN_TASK_OS_FILE_FORMAT] if output_format in self.FINETUNEAS_ALLOWED_FORMATS: template = template.replace( self.FINETUNEAS_REPLACE_OUTPUT_FORMAT, u"outputFormat = \"%s\";" % output_format ) if output_format == "smil": for key, placeholder, replacement in [ ( gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF, self.FINETUNEAS_REPLACE_SMIL_AUDIOREF, "audioref = \"%s\";" ), ( gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF, self.FINETUNEAS_REPLACE_SMIL_PAGEREF, "pageref = \"%s\";" ), ]: if key in parameters: template = template.replace( placeholder, replacement % parameters[key] ) with io.open(output_file_path, "w", encoding="utf-8") as file_obj: file_obj.write(template)
class TestSynthesizer(unittest.TestCase): PATH_NOT_WRITEABLE = gf.absolute_path("x/y/z/not_writeable.wav", __file__) def perform(self, path, expected, expected2=None, logger=None, quit_after=None, backwards=False): def inner(c_ext, cew_subprocess): handler, output_file_path = gf.tmp_file(suffix=".wav") tfl = TextFile(gf.absolute_path(path, __file__), TextFileFormat.PLAIN) tfl.set_language(Language.ENG) synth = Synthesizer(logger=logger) synth.rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext synth.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess result = synth.synthesize(tfl, output_file_path, quit_after=quit_after, backwards=backwards) gf.delete_file(handler, output_file_path) self.assertEqual(len(result[0]), expected) if expected2 is not None: self.assertAlmostEqual(result[1], expected2, places=0) for c_ext in [True, False]: for cew_subprocess in [True, False]: inner(c_ext, cew_subprocess) def test_clear_cache(self): synth = Synthesizer() synth.clear_cache() def test_synthesize_none(self): synth = Synthesizer() with self.assertRaises(TypeError): synth.synthesize(None, self.PATH_NOT_WRITEABLE) def test_synthesize_invalid_text_file(self): synth = Synthesizer() with self.assertRaises(TypeError): synth.synthesize("foo", self.PATH_NOT_WRITEABLE) def test_synthesize_path_not_writeable(self): tfl = TextFile() synth = Synthesizer() with self.assertRaises(OSError): synth.synthesize(tfl, self.PATH_NOT_WRITEABLE) def test_synthesize(self): self.perform("res/inputtext/sonnet_plain.txt", 15) def test_synthesize_logger(self): logger = Logger() self.perform("res/inputtext/sonnet_plain.txt", 15, logger=logger) def test_synthesize_unicode(self): self.perform("res/inputtext/sonnet_plain_utf8.txt", 15) def test_synthesize_quit_after(self): self.perform("res/inputtext/sonnet_plain.txt", 6, TimeValue("12.000"), quit_after=TimeValue("10.000")) def test_synthesize_backwards(self): self.perform("res/inputtext/sonnet_plain.txt", 15, backwards=True) def test_synthesize_quit_after_backwards(self): self.perform("res/inputtext/sonnet_plain.txt", 4, TimeValue("10.000"), quit_after=TimeValue("10.000"), backwards=True)
def container(self, path, expected): validator = Validator() result = validator.check_container(gf.absolute_path(path, __file__)) self.assertEqual(result.passed, expected) if expected: self.assertEqual(len(result.errors), 0) else: self.assertGreater(len(result.errors), 0)
def test_exec_is_text_file_transliterate_map(self): path = gf.absolute_path("res/transliteration/transliteration.map", __file__) self.execute([ ("in", "../tools/res/audio.mp3"), ("in", "../tools/res/subtitles.txt"), ("", "task_language=eng|is_text_type=subtitles|os_task_file_format=srt|is_text_file_transliterate_map=%s" % path), ("out", "sonnet.srt") ], 0)
def test_load_audio_file(self): af = AudioFile(gf.absolute_path(self.AUDIO_FILE_WAVE, __file__)) af.read_samples_from_file() audiofile = AudioFileMFCC(audio_file=af) self.assertIsNotNone(audiofile.all_mfcc) self.assertAlmostEqual(audiofile.audio_length, TimeValue("53.3"), places=1) # 53.266
def test_set_audio_file_path_absolute(self): task = Task() task.audio_file_path_absolute = gf.absolute_path( "res/container/job/assets/p001.mp3", __file__) self.assertIsNotNone(task.audio_file) self.assertEqual(task.audio_file.file_size, 426735) self.assertAlmostEqual(task.audio_file.audio_length, TimeValue("53.3"), places=1)
def load(self, input_file_path=PLAIN_FILE_PATH, fmt=TextFileFormat.PLAIN, expected_length=15, parameters=None): tfl = TextFile(gf.absolute_path(input_file_path, __file__), fmt, parameters) self.assertEqual(len(tfl), expected_length) return tfl
def load(self, path): audiofile = AudioFileMFCC(gf.absolute_path(path, __file__)) self.assertIsNotNone(audiofile.all_mfcc) self.assertFalse(audiofile.is_reversed) self.assertNotEqual(audiofile.all_length, 0) self.assertEqual(audiofile.head_length, 0) self.assertEqual(audiofile.tail_length, 0) self.assertNotEqual(audiofile.middle_length, 0) self.assertNotEqual(audiofile.audio_length, 0) return audiofile
def output_html_for_tuning(self, audio_file_path, output_file_path, parameters=None): """ Output an HTML file for fine tuning the sync map manually. :param string audio_file_path: the path to the associated audio file :param string output_file_path: the path to the output file to write :param dict parameters: additional parameters .. versionadded:: 1.3.1 """ if not gf.file_can_be_written(output_file_path): self.log_exc( u"Cannot output HTML file '%s'. Wrong permissions?" % (output_file_path), None, True, OSError) if parameters is None: parameters = {} audio_file_path_absolute = gf.fix_slash( os.path.abspath(audio_file_path)) template_path_absolute = gf.absolute_path(self.FINETUNEAS_PATH, __file__) with io.open(template_path_absolute, "r", encoding="utf-8") as file_obj: template = file_obj.read() for repl in self.FINETUNEAS_REPLACEMENTS: template = template.replace(repl[0], repl[1]) template = template.replace( self.FINETUNEAS_REPLACE_AUDIOFILEPATH, u"audioFilePath = \"file://%s\";" % audio_file_path_absolute) template = template.replace( self.FINETUNEAS_REPLACE_FRAGMENTS, u"fragments = (%s).fragments;" % self.json_string) if gc.PPN_TASK_OS_FILE_FORMAT in parameters: output_format = parameters[gc.PPN_TASK_OS_FILE_FORMAT] if output_format in self.FINETUNEAS_ALLOWED_FORMATS: template = template.replace( self.FINETUNEAS_REPLACE_OUTPUT_FORMAT, u"outputFormat = \"%s\";" % output_format) if output_format == "smil": for key, placeholder, replacement in [ (gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF, self.FINETUNEAS_REPLACE_SMIL_AUDIOREF, "audioref = \"%s\";"), (gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF, self.FINETUNEAS_REPLACE_SMIL_PAGEREF, "pageref = \"%s\";"), ]: if key in parameters: template = template.replace( placeholder, replacement % parameters[key]) with io.open(output_file_path, "w", encoding="utf-8") as file_obj: file_obj.write(template)
def execute(self, path): input_path = gf.absolute_path(path, __file__) output_path = gf.tmp_directory() executor = ExecuteJob(job=None) executor.load_job_from_container(input_path) self.assertIsNotNone(executor.job) executor.execute() result_path = executor.write_output_container(output_path) self.assertIsNotNone(result_path) self.assertTrue(gf.file_exists(result_path)) executor.clean() gf.delete_directory(output_path)
def test_absolute_path(self): base = os.path.dirname(os.path.realpath(sys.argv[0])) tests = [ ("res", "aeneas/tools/somefile.py", os.path.join(base, "aeneas/tools/res")), ("res/foo", "aeneas/tools/somefile.py", os.path.join(base, "aeneas/tools/res/foo")), ("res/bar.baz", "aeneas/tools/somefile.py", os.path.join(base, "aeneas/tools/res/bar.baz")), ("res", "/aeneas/tools/somefile.py", "/aeneas/tools/res"), ("res/foo", "/aeneas/tools/somefile.py", "/aeneas/tools/res/foo"), ("res/bar.baz", "/aeneas/tools/somefile.py", "/aeneas/tools/res/bar.baz"), ] for test in tests: self.assertEqual(gf.absolute_path(test[0], test[1]), test[2])
class TestCDTW(unittest.TestCase): MFCC1 = gf.absolute_path("res/cdtw/mfcc1_12_1332", __file__) MFCC2 = gf.absolute_path("res/cdtw/mfcc2_12_868", __file__) def test_compute_path(self): try: import aeneas.cdtw.cdtw mfcc1 = numpy.loadtxt(self.MFCC1) mfcc2 = numpy.loadtxt(self.MFCC2) l, n = mfcc1.shape l, m = mfcc2.shape delta = 3000 if delta > m: delta = m best_path = aeneas.cdtw.cdtw.compute_best_path(mfcc1, mfcc2, delta) self.assertEqual(len(best_path), 1418) self.assertEqual(best_path[0], (0, 0)) self.assertEqual(best_path[-1], (n - 1, m - 1)) except ImportError: pass
def inner(c_ext, cew_subprocess): handler, output_file_path = gf.tmp_file(suffix=".wav") tfl = TextFile(gf.absolute_path(path, __file__), TextFileFormat.PLAIN) tfl.set_language(Language.ENG) synth = Synthesizer(logger=logger) synth.rconf[RuntimeConfiguration.C_EXTENSIONS] = c_ext synth.rconf[RuntimeConfiguration.CEW_SUBPROCESS_ENABLED] = cew_subprocess result = synth.synthesize(tfl, output_file_path, quit_after=quit_after, backwards=backwards) gf.delete_file(handler, output_file_path) self.assertEqual(len(result[0]), expected) if expected2 is not None: self.assertAlmostEqual(result[1], expected2, places=0)
def read(self, fmt, multiline=False, utf8=False, parameters=PARAMETERS): syn = SyncMap() if multiline and utf8: path = "res/syncmaps/sonnet001_mu." elif multiline: path = "res/syncmaps/sonnet001_m." elif utf8: path = "res/syncmaps/sonnet001_u." else: path = "res/syncmaps/sonnet001." syn.read(fmt, gf.absolute_path(path + fmt, __file__), parameters=parameters) return syn
def execute(self, parameters, expected_exit_code): output_path = gf.tmp_directory() params = ["placeholder"] for p_type, p_value in parameters: if p_type == "in": params.append(gf.absolute_path(p_value, __file__)) elif p_type == "out": params.append(os.path.join(output_path, p_value)) else: params.append(p_value) exit_code = ExecuteJobCLI(use_sys=False).run(arguments=params) gf.delete_directory(output_path) self.assertEqual(exit_code, expected_exit_code)
def set_text_file(self, path, fmt, expected, id_regex=None, class_regex=None, id_sort=None): task = Task() task.configuration = TaskConfiguration() task.configuration["language"] = Language.ENG task.configuration["i_t_format"] = fmt if class_regex is not None: task.configuration["i_t_unparsed_class_regex"] = class_regex if id_regex is not None: task.configuration["i_t_unparsed_id_regex"] = id_regex if id_sort is not None: task.configuration["i_t_unparsed_id_sort"] = id_sort task.text_file_path_absolute = gf.absolute_path(path, __file__) self.assertIsNotNone(task.text_file) self.assertEqual(len(task.text_file), expected)
class TestCMFCC(unittest.TestCase): AUDIO = gf.absolute_path("res/audioformats/mono.16000.wav", __file__) def test_compute_mfcc(self): try: import aeneas.cmfcc.cmfcc audio_file = AudioFile(self.AUDIO) audio_file.read_samples_from_file() mfcc_c = (aeneas.cmfcc.cmfcc.compute_from_data( audio_file.audio_samples, audio_file.audio_sample_rate, 40, 13, 512, 133.3333, 6855.4976, 0.97, 0.025, 0.010)[0]).transpose() self.assertEqual(mfcc_c.shape[0], 13) self.assertGreater(mfcc_c.shape[1], 0) except ImportError: pass
def convert(self, input_file_path, ofp=None, runtime_configuration=None): if ofp is None: output_path = gf.tmp_directory() output_file_path = os.path.join(output_path, "audio.wav") else: output_file_path = ofp try: converter = FFMPEGWrapper(rconf=runtime_configuration) result = converter.convert( gf.absolute_path(input_file_path, __file__), output_file_path) self.assertEqual(result, output_file_path) gf.delete_directory(output_path) except OSError as exc: if ofp is None: gf.delete_directory(output_path) else: gf.delete_file(None, ofp) raise exc
def convert(self, input_file_path, ofp=None, runtime_configuration=None): if ofp is None: output_path = gf.tmp_directory() output_file_path = os.path.join(output_path, "audio.wav") else: output_file_path = ofp try: converter = FFMPEGWrapper(rconf=runtime_configuration) result = converter.convert( gf.absolute_path(input_file_path, __file__), output_file_path ) self.assertEqual(result, output_file_path) gf.delete_directory(output_path) except OSError as exc: if ofp is None: gf.delete_directory(output_path) else: gf.delete_file(None, ofp) raise exc
def check_ffprobe(cls): """ Check whether ``ffprobe`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.ffprobewrapper import FFPROBEWrapper file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__) prober = FFPROBEWrapper() properties = prober.read_properties(file_path) gf.print_success(u"ffprobe OK") return False except: pass gf.print_error(u"ffprobe ERROR") gf.print_info(u" Please make sure you have ffprobe installed correctly") gf.print_info(u" (usually it is provided by the ffmpeg installer)") gf.print_info(u" and that its path is in your PATH environment variable") return True
def _synthesize_single_helper(self, text, voice_code, output_file_path=None): """ This is an helper function to synthesize a single text fragment via Python call. The caller can choose whether the output file should be written to disk or not. :rtype: tuple (result, (duration, sample_rate, encoding, data)) """ # # NOTE in this example, we assume that the Speect voice data files # are located in the same directory of this .py source file # and that the voice JSON file is called "voice.json" # # NOTE the voice_code value is ignored in this example, # but in general one might select a voice file to load, # depending on voice_code # voice_json_path = gf.safe_str(gf.absolute_path("voice.json", __file__)) voice = speect.SVoice(voice_json_path) utt = voice.synth(text) audio = utt.features["audio"] if output_file_path is not None: audio.save_riff(gf.safe_str(output_file_path)) # get length and data using speect Python API waveform = audio.get_audio_waveform() audio_sample_rate = int(waveform["samplerate"]) audio_length = TimeValue(audio.num_samples() / audio_sample_rate) audio_format = "pcm16" audio_samples = numpy.fromstring( waveform["samples"], dtype=numpy.int16).astype("float64") / 32768 # return data return (audio_length, audio_sample_rate, audio_format, audio_samples)
def check_ffmpeg(cls): """ Check whether ``ffmpeg`` can be called. Return ``True`` on failure and ``False`` on success. :rtype: bool """ try: from aeneas.ffmpegwrapper import FFMPEGWrapper input_file_path = gf.absolute_path(u"tools/res/audio.mp3", __file__) handler, output_file_path = gf.tmp_file(suffix=u".wav") converter = FFMPEGWrapper() result = converter.convert(input_file_path, output_file_path) gf.delete_file(handler, output_file_path) if result: gf.print_success(u"ffmpeg OK") return False except: pass gf.print_error(u"ffmpeg ERROR") gf.print_info(u" Please make sure you have ffmpeg installed correctly") gf.print_info(u" and that its path is in your PATH environment variable") return True
def test_set_audio_file_path_absolute(self): task = Task() task.audio_file_path_absolute = gf.absolute_path("res/container/job/assets/p001.mp3", __file__) self.assertIsNotNone(task.audio_file) self.assertEqual(task.audio_file.file_size, 426735) self.assertAlmostEqual(task.audio_file.audio_length, TimeValue("53.3"), places=1)
def test_set_audio_file_path_absolute_error(self): task = Task() with self.assertRaises(OSError): task.audio_file_path_absolute = gf.absolute_path( "not_existing.mp3", __file__)
def file_encoding(self, path, expected): validator = Validator() result = validator.check_file_encoding(gf.absolute_path(path, __file__)) self.assertEqual(result.passed, expected)
def load(self, input_file_path): prober = FFPROBEWrapper() return prober.read_properties( gf.absolute_path(input_file_path, __file__) )
class TestDTWAligner(unittest.TestCase): AUDIO_FILE = gf.absolute_path("res/audioformats/mono.16000.wav", __file__) NUMPY_ARRAY_1 = numpy.loadtxt( gf.absolute_path("res/cdtw/mfcc1_12_1332", __file__)) NUMPY_ARRAY_2 = numpy.loadtxt( gf.absolute_path("res/cdtw/mfcc2_12_868", __file__)) def test_create_aligner(self): aligner = DTWAligner() self.assertIsNone(aligner.real_wave_mfcc) self.assertIsNone(aligner.synt_wave_mfcc) self.assertIsNone(aligner.real_wave_path) self.assertIsNone(aligner.synt_wave_path) def test_set_real_wave_path(self): aligner = DTWAligner(real_wave_path=self.AUDIO_FILE) self.assertIsNotNone(aligner.real_wave_mfcc) self.assertIsNone(aligner.synt_wave_mfcc) self.assertIsNotNone(aligner.real_wave_path) self.assertIsNone(aligner.synt_wave_path) def test_set_synt_wave_path(self): aligner = DTWAligner(synt_wave_path=self.AUDIO_FILE) self.assertIsNone(aligner.real_wave_mfcc) self.assertIsNotNone(aligner.synt_wave_path) self.assertIsNone(aligner.real_wave_path) self.assertIsNotNone(aligner.synt_wave_mfcc) def test_set_real_wave_mfcc(self): af = AudioFileMFCC(self.AUDIO_FILE) aligner = DTWAligner(real_wave_mfcc=af) self.assertIsNotNone(aligner.real_wave_mfcc) self.assertIsNone(aligner.synt_wave_mfcc) self.assertIsNone(aligner.real_wave_path) self.assertIsNone(aligner.synt_wave_path) def test_set_synt_wave_mfcc(self): af = AudioFileMFCC(self.AUDIO_FILE) aligner = DTWAligner(synt_wave_mfcc=af) self.assertIsNone(aligner.real_wave_mfcc) self.assertIsNotNone(aligner.synt_wave_mfcc) self.assertIsNone(aligner.real_wave_path) self.assertIsNone(aligner.synt_wave_path) def test_compute_acm_none(self): aligner = DTWAligner() with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_accumulated_cost_matrix() def test_compute_acm_real_path(self): aligner = DTWAligner(real_wave_path=self.AUDIO_FILE) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_accumulated_cost_matrix() def test_compute_acm_synt_path(self): aligner = DTWAligner(synt_wave_path=self.AUDIO_FILE) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_accumulated_cost_matrix() def test_compute_acm_real_mfcc(self): af = AudioFileMFCC(self.AUDIO_FILE) aligner = DTWAligner(real_wave_mfcc=af) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_accumulated_cost_matrix() def test_compute_acm_synt_mfcc(self): af = AudioFileMFCC(self.AUDIO_FILE) aligner = DTWAligner(synt_wave_mfcc=af) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_accumulated_cost_matrix() def test_compute_path_none(self): aligner = DTWAligner() with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_accumulated_cost_matrix() def test_compute_path_real_path(self): aligner = DTWAligner(real_wave_path=self.AUDIO_FILE) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_path() def test_compute_path_synt_path(self): aligner = DTWAligner(synt_wave_path=self.AUDIO_FILE) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_path() def test_compute_path_real_mfcc(self): af = AudioFileMFCC(self.AUDIO_FILE) aligner = DTWAligner(real_wave_mfcc=af) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_path() def test_compute_path_synt_mfcc(self): af = AudioFileMFCC(self.AUDIO_FILE) aligner = DTWAligner(synt_wave_mfcc=af) with self.assertRaises(DTWAlignerNotInitialized): aligner.compute_path() def test_compute_acm(self): # NOTE this takes too long, run as part of the long_ tests pass def test_compute_path(self): # NOTE this takes too long, run as part of the long_ tests pass
class TestTextFile(unittest.TestCase): NOT_EXISTING_PATH = gf.absolute_path("not_existing.txt", __file__) NOT_WRITEABLE_PATH = gf.absolute_path("x/y/z/not_writeable.txt", __file__) EMPTY_FILE_PATH = "res/inputtext/empty.txt" BLANK_FILE_PATH = "res/inputtext/blank.txt" PLAIN_FILE_PATH = "res/inputtext/sonnet_plain.txt" PARSED_FILE_PATH = "res/inputtext/sonnet_parsed.txt" MPLAIN_FILE_PATH = "res/inputtext/sonnet_mplain.txt" MUNPARSED_FILE_PATH = "res/inputtext/sonnet_munparsed.xhtml" UNPARSED_PARAMETERS = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: "p[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: "p[0-9]+s[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: "p[0-9]+s[0-9]+w[0-9]+", gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: "f[0-9]+", gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: "ra", gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT: IDSortingAlgorithm.UNSORTED, } ID_REGEX_PARAMETERS = {gc.PPN_TASK_OS_FILE_ID_REGEX: u"word%06d"} ID_REGEX_PARAMETERS_BAD = {gc.PPN_TASK_OS_FILE_ID_REGEX: u"word"} TRANSLITERATION_MAP_FILE_PATH = gf.absolute_path( "res/transliteration/transliteration.map", __file__) def load(self, input_file_path=PLAIN_FILE_PATH, fmt=TextFileFormat.PLAIN, expected_length=15, parameters=None): tfl = TextFile(gf.absolute_path(input_file_path, __file__), fmt, parameters) self.assertEqual(len(tfl), expected_length) return tfl def load_and_sort_id(self, input_file_path, id_regex, id_sort, expected): parameters = {} parameters[gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX] = id_regex parameters[gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT] = id_sort tfl = self.load(input_file_path, TextFileFormat.UNPARSED, 5, parameters) i = 0 for e in expected: self.assertEqual(tfl.fragments[i].identifier, e) i += 1 def load_and_slice(self, expected, start=None, end=None): tfl = self.load() sli = tfl.get_slice(start, end) self.assertEqual(len(sli), expected) return sli def filter_ignore_regex(self, regex, string_in, expected_out): fil = TextFilterIgnoreRegex(regex) string_out = fil.apply_filter(string_in) self.assertEqual(string_out, expected_out) def filter_transliterate(self, string_in, expected_out, map_file_path=TRANSLITERATION_MAP_FILE_PATH): fil = TextFilterTransliterate(map_file_path=map_file_path) string_out = fil.apply_filter(string_in) self.assertEqual(string_out, expected_out) def test_tf_identifier_str(self): with self.assertRaises(TypeError): tf = TextFragment(identifier=b"foo") def test_tf_identifier_unicode(self): tf = TextFragment(identifier=u"foo") self.assertEqual(len(tf), 0) def test_tf_lines_invalid(self): with self.assertRaises(TypeError): tf = TextFragment(lines="foo") def test_tf_lines_invalid_none(self): with self.assertRaises(TypeError): tf = TextFragment(lines=[None]) def test_tf_lines_invalid_none_mixed(self): with self.assertRaises(TypeError): tf = TextFragment(lines=[u"foo", None, u"bar"]) def test_tf_lines_invalid_str(self): with self.assertRaises(TypeError): tf = TextFragment(lines=[b"foo"]) def test_tf_lines_invalid_str_mixed(self): with self.assertRaises(TypeError): tf = TextFragment(lines=[u"foo", b"bar", u"baz"]) def test_tf_lines_unicode(self): tf = TextFragment(lines=[u"foo"]) self.assertEqual(len(tf), 1) def test_tf_lines_unicode_multiple(self): tf = TextFragment(lines=[u"foo", u"bar", u"baz"]) self.assertEqual(len(tf), 3) def test_tf_lines_unicode_empty_string(self): tf = TextFragment(lines=[u""]) self.assertEqual(len(tf), 1) def test_tf_lines_unicode_empty_string_multiple(self): tf = TextFragment(lines=[u"", u"", u""]) self.assertEqual(len(tf), 3) def test_constructor(self): tfl = TextFile() self.assertEqual(len(tfl), 0) def test_file_path_not_existing(self): with self.assertRaises(OSError): tfl = TextFile(file_path=self.NOT_EXISTING_PATH) def test_invalid_format(self): with self.assertRaises(ValueError): tfl = TextFile(file_format="foo") def test_invalid_parameters(self): with self.assertRaises(TypeError): tfl = TextFile(parameters=["foo"]) def test_empty_fragments(self): tfl = TextFile() self.assertEqual(len(tfl), 0) def test_invalid_add_fragment(self): tfl = TextFile() with self.assertRaises(TypeError): tfl.add_fragment("foo") def test_read_empty(self): for fmt in TextFileFormat.ALLOWED_VALUES: self.load(self.EMPTY_FILE_PATH, fmt, 0, self.UNPARSED_PARAMETERS) def test_read_blank(self): for fmt in TextFileFormat.ALLOWED_VALUES: expected = 0 if fmt == TextFileFormat.PLAIN: expected = 5 self.load(self.BLANK_FILE_PATH, fmt, expected, self.UNPARSED_PARAMETERS) def test_read_subtitles(self): for path in [ "res/inputtext/sonnet_subtitles_with_end_newline.txt", "res/inputtext/sonnet_subtitles_no_end_newline.txt", "res/inputtext/sonnet_subtitles_multiple_blank.txt", "res/inputtext/sonnet_subtitles_multiple_rows.txt" ]: self.load(path, TextFileFormat.SUBTITLES, 15) def test_read_subtitles_id_regex(self): for path in [ "res/inputtext/sonnet_subtitles_with_end_newline.txt", "res/inputtext/sonnet_subtitles_no_end_newline.txt", "res/inputtext/sonnet_subtitles_multiple_blank.txt", "res/inputtext/sonnet_subtitles_multiple_rows.txt" ]: self.load(path, TextFileFormat.SUBTITLES, 15, self.ID_REGEX_PARAMETERS) def test_read_subtitles_id_regex_bad(self): with self.assertRaises(ValueError): for path in [ "res/inputtext/sonnet_subtitles_with_end_newline.txt", "res/inputtext/sonnet_subtitles_no_end_newline.txt", "res/inputtext/sonnet_subtitles_multiple_blank.txt", "res/inputtext/sonnet_subtitles_multiple_rows.txt" ]: self.load(path, TextFileFormat.SUBTITLES, 15, self.ID_REGEX_PARAMETERS_BAD) def test_read_mplain(self): self.load(self.MPLAIN_FILE_PATH, TextFileFormat.MPLAIN, 5) def test_read_mplain_variations(self): for path in [ "res/inputtext/sonnet_mplain_with_end_newline.txt", "res/inputtext/sonnet_mplain_no_end_newline.txt", "res/inputtext/sonnet_mplain_multiple_blank.txt" ]: self.load(path, TextFileFormat.MPLAIN, 5) def test_read_munparsed(self): tfl = self.load(self.MUNPARSED_FILE_PATH, TextFileFormat.MUNPARSED, 5, self.UNPARSED_PARAMETERS) self.assertEqual(len(tfl.fragments_tree.vleaves), 107) def test_read_munparsed_diff_id(self): parameters = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: "p[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: "s[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: "w[0-9]+", } tfl = self.load("res/inputtext/sonnet_munparsed_diff_id.xhtml", TextFileFormat.MUNPARSED, 5, parameters) self.assertEqual(len(tfl.fragments_tree.vleaves), 107) def test_read_munparsed_bad_param_l1(self): parameters = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: "k[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: "s[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: "w[0-9]+", } tfl = self.load("res/inputtext/sonnet_munparsed_diff_id.xhtml", TextFileFormat.MUNPARSED, 0, parameters) def test_read_munparsed_bad_param_l2(self): parameters = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: "p[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: "k[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: "w[0-9]+", } tfl = self.load("res/inputtext/sonnet_munparsed_diff_id.xhtml", TextFileFormat.MUNPARSED, 0, parameters) def test_read_munparsed_bad_param_l3(self): parameters = { gc.PPN_TASK_IS_TEXT_MUNPARSED_L1_ID_REGEX: "p[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L2_ID_REGEX: "s[0-9]+", gc.PPN_TASK_IS_TEXT_MUNPARSED_L3_ID_REGEX: "k[0-9]+", } tfl = self.load("res/inputtext/sonnet_munparsed_diff_id.xhtml", TextFileFormat.MUNPARSED, 0, parameters) def test_read_plain(self): self.load(self.PLAIN_FILE_PATH, TextFileFormat.PLAIN, 15) def test_read_plain_id_regex(self): self.load(self.PLAIN_FILE_PATH, TextFileFormat.PLAIN, 15, self.ID_REGEX_PARAMETERS) def test_read_plain_id_regex_bad(self): with self.assertRaises(ValueError): self.load(self.PLAIN_FILE_PATH, TextFileFormat.PLAIN, 15, self.ID_REGEX_PARAMETERS_BAD) def test_read_plain_utf8(self): self.load("res/inputtext/sonnet_plain_utf8.txt", TextFileFormat.PLAIN, 15) def test_read_plain_utf8_id_regex(self): self.load("res/inputtext/sonnet_plain_utf8.txt", TextFileFormat.PLAIN, 15, self.ID_REGEX_PARAMETERS) def test_read_plain_utf8_id_regex_bad(self): with self.assertRaises(ValueError): self.load("res/inputtext/sonnet_plain_utf8.txt", TextFileFormat.PLAIN, 15, self.ID_REGEX_PARAMETERS_BAD) def test_read_parsed(self): self.load(self.PARSED_FILE_PATH, TextFileFormat.PARSED, 15) def test_read_parsed_bad(self): for path in [ "res/inputtext/badly_parsed_1.txt", "res/inputtext/badly_parsed_2.txt", "res/inputtext/badly_parsed_3.txt" ]: self.load(path, TextFileFormat.PARSED, 0) def test_read_unparsed(self): for case in [ { "path": "res/inputtext/sonnet_unparsed_soup_1.txt", "parameters": { gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: "f[0-9]*" } }, { "path": "res/inputtext/sonnet_unparsed_soup_2.txt", "parameters": { gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: "f[0-9]*", gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: "ra" } }, { "path": "res/inputtext/sonnet_unparsed_soup_3.txt", "parameters": { gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX: "ra" } }, { "path": "res/inputtext/sonnet_unparsed.xhtml", "parameters": { gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX: "f[0-9]*" } }, ]: self.load(case["path"], TextFileFormat.UNPARSED, 15, case["parameters"]) def test_read_unparsed_unsorted(self): self.load_and_sort_id("res/inputtext/sonnet_unparsed_order_1.txt", "f[0-9]*", IDSortingAlgorithm.UNSORTED, [u"f001", u"f003", u"f005", u"f004", u"f002"]) def test_read_unparsed_numeric(self): self.load_and_sort_id("res/inputtext/sonnet_unparsed_order_2.txt", "f[0-9]*", IDSortingAlgorithm.NUMERIC, [u"f001", u"f2", u"f003", u"f4", u"f050"]) def test_read_unparsed_numeric_2(self): self.load_and_sort_id("res/inputtext/sonnet_unparsed_order_3.txt", "f[0-9]*", IDSortingAlgorithm.NUMERIC, [u"f001", u"f2", u"f003", u"f4", u"f050"]) def test_read_unparsed_lexicographic(self): self.load_and_sort_id("res/inputtext/sonnet_unparsed_order_4.txt", "[a-z][0-9]*", IDSortingAlgorithm.LEXICOGRAPHIC, [u"a005", u"b002", u"c004", u"d001", u"e003"]) def test_read_unparsed_numeric_3(self): self.load_and_sort_id("res/inputtext/sonnet_unparsed_order_5.txt", "[a-z][0-9]*", IDSortingAlgorithm.NUMERIC, [u"d001", u"b002", u"e003", u"c004", u"a005"]) def test_set_language(self): tfl = self.load() tfl.set_language(Language.ENG) for fragment in tfl.fragments: self.assertEqual(fragment.language, Language.ENG) tfl.set_language(Language.ITA) for fragment in tfl.fragments: self.assertEqual(fragment.language, Language.ITA) def test_set_language_on_empty(self): tfl = TextFile() self.assertEqual(len(tfl), 0) tfl.set_language(Language.ENG) self.assertEqual(len(tfl), 0) self.assertEqual(tfl.chars, 0) def test_read_from_list(self): tfl = TextFile() text_list = [ u"fragment 1", u"fragment 2", u"fragment 3", u"fragment 4", u"fragment 5" ] tfl.read_from_list(text_list) self.assertEqual(len(tfl), 5) self.assertEqual(tfl.chars, 50) def test_read_from_list_with_ids(self): tfl = TextFile() text_list = [(u"a1", u"fragment 1"), (u"b2", u"fragment 2"), (u"c3", u"fragment 3"), (u"d4", u"fragment 4"), (u"e5", u"fragment 5")] tfl.read_from_list_with_ids(text_list) self.assertEqual(len(tfl), 5) self.assertEqual(tfl.chars, 50) def test_add_fragment(self): tfl = TextFile() self.assertEqual(len(tfl), 0) tfl.add_fragment(TextFragment(u"a1", Language.ENG, [u"fragment 1"])) self.assertEqual(len(tfl), 1) self.assertEqual(tfl.chars, 10) def test_add_fragment_multiple(self): tfl = TextFile() self.assertEqual(len(tfl), 0) tfl.add_fragment(TextFragment(u"a1", Language.ENG, [u"fragment 1"])) self.assertEqual(len(tfl), 1) tfl.add_fragment(TextFragment(u"a2", Language.ENG, [u"fragment 2"])) self.assertEqual(len(tfl), 2) tfl.add_fragment(TextFragment(u"a3", Language.ENG, [u"fragment 3"])) self.assertEqual(len(tfl), 3) self.assertEqual(tfl.chars, 30) def test_get_subtree_bad(self): tfl = self.load() with self.assertRaises(TypeError): sub = tfl.get_subtree("abc") with self.assertRaises(TypeError): sub = tfl.get_subtree(None) with self.assertRaises(TypeError): sub = tfl.get_subtree(tfl.fragments[0]) def test_get_subtree(self): tfl = self.load(input_file_path=self.MPLAIN_FILE_PATH, fmt=TextFileFormat.MPLAIN, expected_length=5) children = tfl.fragments_tree.children self.assertEqual(len(children), 5) sub = tfl.get_subtree(children[0]) self.assertEqual(len(sub), 1) sub = tfl.get_subtree(children[1]) self.assertEqual(len(sub), 4) sub = tfl.get_subtree(children[2]) self.assertEqual(len(sub), 4) sub = tfl.get_subtree(children[3]) self.assertEqual(len(sub), 4) sub = tfl.get_subtree(children[4]) self.assertEqual(len(sub), 2) def test_children_not_empty(self): tfl = self.load(input_file_path=self.MPLAIN_FILE_PATH, fmt=TextFileFormat.MPLAIN, expected_length=5) children = tfl.children_not_empty self.assertEqual(len(children), 5) def test_get_slice_no_args(self): tfl = self.load() sli = tfl.get_slice() self.assertEqual(len(sli), 15) self.assertEqual(sli.chars, 597) def test_get_slice_only_start(self): sli = self.load_and_slice(10, 5) self.assertEqual(sli.chars, 433) def test_get_slice_start_and_end(self): sli = self.load_and_slice(5, 5, 10) self.assertEqual(sli.chars, 226) def test_get_slice_start_greater_than_length(self): sli = self.load_and_slice(1, 100) self.assertEqual(sli.chars, 46) def test_get_slice_start_less_than_zero(self): sli = self.load_and_slice(15, -1) self.assertEqual(sli.chars, 597) def test_get_slice_end_greater_then_length(self): sli = self.load_and_slice(15, 0, 100) self.assertEqual(sli.chars, 597) def test_get_slice_end_less_than_zero(self): sli = self.load_and_slice(1, 0, -1) self.assertEqual(sli.chars, 1) def test_get_slice_end_less_than_start(self): sli = self.load_and_slice(1, 10, 5) self.assertEqual(sli.chars, 36) def test_filter_identity(self): fil = TextFilter() string_in = [u"abc"] string_out = fil.apply_filter(string_in) expected_out = string_in self.assertEqual(string_out, expected_out) def test_filter_ignore_regex_error(self): with self.assertRaises(ValueError): self.filter_ignore_regex("word[abc", [u"abc"], [u"abc"]) def test_filter_ignore_regex_replace_empty(self): self.filter_ignore_regex("word", [u""], [u""]) def test_filter_ignore_regex_replace_none(self): self.filter_ignore_regex("word", [None], [None]) def test_filter_ignore_regex_no_match(self): self.filter_ignore_regex("word", [u"abc"], [u"abc"]) def test_filter_ignore_regex_one_match(self): self.filter_ignore_regex("word", [u"abc word abc"], [u"abc abc"]) def test_filter_ignore_regex_many_matches(self): self.filter_ignore_regex("word", [u"abc word word abc word abc"], [u"abc abc abc"]) def test_filter_ignore_regex_strip(self): self.filter_ignore_regex("word", [u"word abc word"], [u"abc"]) def test_filter_ignore_regex_parenthesis(self): self.filter_ignore_regex("\(.*?\)", [u"(CHAR) bla bla bla"], [u"bla bla bla"]) def test_filter_ignore_regex_brackets(self): self.filter_ignore_regex("\[.*?\]", [u"[CHAR] bla bla bla"], [u"bla bla bla"]) def test_filter_ignore_regex_braces(self): self.filter_ignore_regex("\{.*?\}", [u"{CHAR} bla bla bla"], [u"bla bla bla"]) def test_filter_ignore_regex_entire_match(self): self.filter_ignore_regex("word", [u"word"], [u""]) def test_filter_transliterate_identity(self): self.filter_transliterate([u"worm"], [u"worm"]) def test_filter_transliterate_error(self): with self.assertRaises(OSError): self.filter_transliterate([u"worm"], [u"worm"], self.NOT_EXISTING_PATH) def test_filter_transliterate_replace_empty(self): self.filter_transliterate([u""], [u""]) def test_filter_transliterate_replace_none(self): self.filter_transliterate([None], [None]) def test_filter_transliterate_replace_single(self): self.filter_transliterate([u"warm"], [u"wArm"]) def test_filter_transliterate_replace_range(self): self.filter_transliterate([u"pill"], [u"pull"]) def test_filter_transliterate_replace_single_unicode(self): self.filter_transliterate([u"wàrm"], [u"warm"]) def test_filter_transliterate_replace_range_unicode(self): self.filter_transliterate([u"wàrèm"], [u"warem"]) def test_filter_transliterate_replace_codepoint(self): self.filter_transliterate([u"Xylophon"], [u"xylophon"]) def test_filter_transliterate_replace_codepoint_range(self): self.filter_transliterate([u"TUTTE"], [u"wwwwE"]) def test_filter_transliterate_replace_codepoint_length(self): self.filter_transliterate([u"x" + u"\u0008" + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + u"\u0088" + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + u"\u0888" + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + u"\u8888" + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + u"\U00088888" + u"z"], [u"xaz"]) self.filter_transliterate([u"x" + u"\U00108888" + u"z"], [u"xaz"])
def test_set_audio_file_path_absolute_error(self): task = Task() with self.assertRaises(OSError): task.audio_file_path_absolute = gf.absolute_path("not_existing.mp3", __file__)
def test_analyze(self): for f in self.FILES: analyzer = AnalyzeContainer(Container(gf.absolute_path(f["path"], __file__))) job = analyzer.analyze() self.assertEqual(len(job), f["length"])
class TestContainer(unittest.TestCase): NOT_EXISTING = gf.absolute_path("not_existing.zip", __file__) EMPTY_FILES = [ gf.absolute_path("res/container/empty_file.epub", __file__), gf.absolute_path("res/container/empty_file.tar", __file__), gf.absolute_path("res/container/empty_file.tar.bz2", __file__), gf.absolute_path("res/container/empty_file.tar.gz", __file__), gf.absolute_path("res/container/empty_file.zip", __file__) ] EXPECTED_ENTRIES = [ "assets/p001.mp3", "assets/p001.xhtml", "assets/p002.mp3", "assets/p002.xhtml", "assets/p003.mp3", "assets/p003.xhtml", "config.txt" ] FILES = { "epub": { "path": gf.absolute_path("res/container/job.epub", __file__), "format": ContainerFormat.EPUB, "config_size": 599 }, "tar": { "path": gf.absolute_path("res/container/job.tar", __file__), "format": ContainerFormat.TAR, "config_size": 599 }, "tar_bz2": { "path": gf.absolute_path("res/container/job.tar.bz2", __file__), "format": ContainerFormat.TAR_BZ2, "config_size": 599 }, "tar": { "path": gf.absolute_path("res/container/job.tar.gz", __file__), "format": ContainerFormat.TAR_GZ, "config_size": 599 }, "unpacked": { "path": gf.absolute_path("res/container/job", __file__), "format": ContainerFormat.UNPACKED, "config_size": 599 }, "zip": { "path": gf.absolute_path("res/container/job.zip", __file__), "format": ContainerFormat.ZIP, "config_size": 599 }, "zip_utf8": { "path": gf.absolute_path("res/container/job_utf8.zip", __file__), "format": ContainerFormat.ZIP, "config_size": 633 }, } def test_path_none(self): with self.assertRaises(TypeError): cont = Container(file_path=None) def test_invalid_container_format(self): with self.assertRaises(ValueError): con = Container(file_path=self.FILES["zip"]["path"], container_format="foo") def test_constructor(self): for key in self.FILES: f = self.FILES[key] file_path = f["path"] container_format = f["format"] cont = Container(file_path, container_format) self.assertEqual(cont.file_path, file_path) self.assertEqual(cont.container_format, container_format) def test_guess_container(self): for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) self.assertEqual(cont.container_format, f["format"]) def test_exists_file_not_existing(self): cont = Container(self.NOT_EXISTING) self.assertFalse(cont.exists()) def test_exists_empty_file(self): for f in self.EMPTY_FILES: cont = Container(f) self.assertTrue(cont.exists()) def test_exists_empty_directory(self): output_path = gf.tmp_directory() cont = Container(output_path) self.assertTrue(cont.exists()) gf.delete_directory(output_path) def test_entries_file_not_existing(self): cont = Container(self.NOT_EXISTING) with self.assertRaises(TypeError): entries = cont.entries def test_entries_empty_file(self): for f in self.EMPTY_FILES: cont = Container(f) with self.assertRaises(OSError): self.assertEqual(len(cont.entries), 0) def test_entries_empty_directory(self): output_path = gf.tmp_directory() cont = Container(output_path) self.assertEqual(len(cont.entries), 0) gf.delete_directory(output_path) def test_entries(self): for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) self.assertEqual(cont.entries, self.EXPECTED_ENTRIES) def test_entries_unpacked_relative(self): f = self.FILES["unpacked"] cont = Container(f["path"]) self.assertEqual(cont.entries, self.EXPECTED_ENTRIES) def test_entries_unpacked_absolute(self): f = self.FILES["unpacked"] cont = Container(os.path.abspath(f["path"])) self.assertEqual(cont.entries, self.EXPECTED_ENTRIES) def test_is_safe_not_existing(self): cont = Container(self.NOT_EXISTING) with self.assertRaises(TypeError): self.assertTrue(cont.is_safe) def test_is_safe_empty_file(self): for f in self.EMPTY_FILES: cont = Container(f) with self.assertRaises(OSError): self.assertTrue(cont.is_safe) def test_is_safe_empty_directory(self): output_path = gf.tmp_directory() cont = Container(output_path) self.assertTrue(cont.is_safe) gf.delete_directory(output_path) def test_is_safe(self): for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) self.assertTrue(cont.is_safe) def test_is_entry_safe_false(self): cont = Container(self.FILES["unpacked"]["path"]) for entry in [ "../foo", "/foo", "foo/../../../../../../../../../../../../bar", "foo/../../../../../bar/../../../../../../baz" ]: self.assertFalse(cont.is_entry_safe(entry)) def test_is_entry_safe_true(self): cont = Container(self.FILES["unpacked"]["path"]) for entry in [ "foo", "foo/bar", "foo/../bar", "foo/../bar/baz", "foo/../bar/../baz", "./foo", "./foo/bar", "foo/./bar" ]: self.assertTrue(cont.is_entry_safe(entry)) def test_read_entry_not_existing(self): cont = Container(self.NOT_EXISTING) with self.assertRaises(TypeError): self.assertIsNone(cont.read_entry(self.EXPECTED_ENTRIES[0])) def test_read_entry_empty_file(self): for f in self.EMPTY_FILES: cont = Container(f) with self.assertRaises(OSError): self.assertIsNone(cont.read_entry(self.EXPECTED_ENTRIES[0])) def test_read_entry_empty_directory(self): output_path = gf.tmp_directory() cont = Container(output_path) self.assertIsNone(cont.read_entry(self.EXPECTED_ENTRIES[0])) gf.delete_directory(output_path) def test_read_entry_existing(self): entry = "config.txt" for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) result = cont.read_entry(entry) self.assertIsNotNone(result) self.assertEqual(len(result), f["config_size"]) def test_find_entry_not_existing(self): cont = Container(self.NOT_EXISTING) with self.assertRaises(TypeError): self.assertIsNone(cont.find_entry(self.EXPECTED_ENTRIES[0])) def test_find_entry_empty_file(self): for f in self.EMPTY_FILES: cont = Container(f) with self.assertRaises(OSError): self.assertIsNone(cont.find_entry(self.EXPECTED_ENTRIES[0])) def test_find_entry_empty_directory(self): output_path = gf.tmp_directory() cont = Container(output_path) self.assertIsNone(cont.find_entry(self.EXPECTED_ENTRIES[0])) gf.delete_directory(output_path) def test_find_entry_existing(self): entry = "config.txt" for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) self.assertTrue(cont.find_entry(entry, exact=True)) self.assertTrue(cont.find_entry(entry, exact=False)) def test_find_entry_existing_not_exact(self): entry = "p001.xhtml" for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) self.assertFalse(cont.find_entry(entry, exact=True)) self.assertTrue(cont.find_entry(entry, exact=False)) def test_read_entry_missing(self): entry = "config_not_existing.txt" for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) result = cont.read_entry(entry) self.assertIsNone(result) def test_find_entry_missing(self): entry = "config_not_existing.txt" for key in self.FILES: f = self.FILES[key] cont = Container(f["path"]) self.assertFalse(cont.find_entry(entry, exact=True)) self.assertFalse(cont.find_entry(entry, exact=False)) def test_decompress(self): for key in self.FILES: output_path = gf.tmp_directory() f = self.FILES[key] cont = Container(f["path"]) cont.decompress(output_path) copy = Container(output_path, ContainerFormat.UNPACKED) self.assertEqual(copy.entries, self.EXPECTED_ENTRIES) gf.delete_directory(output_path) def test_compress_unpacked(self): input_path = self.FILES["unpacked"]["path"] output_path = gf.tmp_directory() cont = Container(output_path, ContainerFormat.UNPACKED) cont.compress(input_path) self.assertFalse(os.path.isfile(output_path)) copy = Container(output_path, ContainerFormat.UNPACKED) self.assertEqual(copy.entries, self.EXPECTED_ENTRIES) gf.delete_directory(output_path) def test_compress_file(self): input_path = self.FILES["unpacked"]["path"] for key in self.FILES: fmt = self.FILES[key]["format"] if fmt != ContainerFormat.UNPACKED: handler, output_path = gf.tmp_file(suffix="." + fmt) cont = Container(output_path, fmt) cont.compress(input_path) self.assertTrue(os.path.isfile(output_path)) copy = Container(output_path, fmt) self.assertEqual(copy.entries, self.EXPECTED_ENTRIES) gf.delete_file(handler, output_path)
def test_wizard_analyze_valid(self): f = self.FILES[0] analyzer = AnalyzeContainer(Container(gf.absolute_path(f["path"], __file__))) job = analyzer.analyze(config_string=self.CONFIG_STRING) self.assertIsNotNone(job) self.assertEqual(len(job), f["length"])
def perform(self, input_file_path, speech_length, nonspeech_length): audiofile = AudioFileMFCC(gf.absolute_path(input_file_path, __file__)) audiofile.run_vad() self.assertEqual(len(audiofile.intervals(speech=True)), speech_length) self.assertEqual(len(audiofile.intervals(speech=False)), nonspeech_length)
def test_write_not_existing_path(self): output_file_path = gf.absolute_path(self.NOT_EXISTING_FILE, __file__) audiofile = self.load(self.AUDIO_FILE_WAVE, rs=True) with self.assertRaises(OSError): audiofile.write(output_file_path)