def test_config_txt_to_string(self): tests = [ (u"", u""), (u"k1=v1", u"k1=v1"), (u"k1=v1\n\n", u"k1=v1"), (u"k1=v1\nk2=v2", u"k1=v1|k2=v2"), (u"k1=v1\nk2=v2\n\n\nk3=v3\n", u"k1=v1|k2=v2|k3=v3"), (u" k1=v1\n k2=v2 \n\n\nk3=v3 \n", u"k1=v1|k2=v2|k3=v3"), (u"k1=v1\nk2\nk3=v3", "k1=v1|k2|k3=v3"), ] self.assertIsNone(gf.config_txt_to_string(None)) for test in tests: self.assertEqual(gf.config_txt_to_string(test[0]), test[1])
def check_config_txt(self, contents, is_config_string=False): """ Check whether the given TXT config file contents (if ``is_config_string`` is ``False``) or TXT config string (if ``is_config_string`` is ``True``) is well-formed and it has all the required parameters. :param string contents: the TXT config file contents or TXT config string :param bool is_config_string: if ``True``, contents is a config string :rtype: :class:`~aeneas.validator.ValidatorResult` """ self.log(u"Checking contents TXT config file") self.result = ValidatorResult() if self._are_safety_checks_disabled(u"check_config_txt"): return self.result is_bstring = gf.is_bytes(contents) if is_bstring: self.log(u"Checking that contents is well formed") self.check_raw_string(contents, is_bstring=True) if not self.result.passed: return self.result contents = gf.safe_unicode(contents) if not is_config_string: self.log(u"Converting file contents to config string") contents = gf.config_txt_to_string(contents) self.log(u"Checking required parameters") required_parameters = self.TXT_REQUIRED_PARAMETERS parameters = gf.config_string_to_dict(contents, self.result) self._check_required_parameters(required_parameters, parameters) self.log([u"Checking contents: returning %s", self.result.passed]) return self.result
def check_contents_txt_config_file(self, config_contents, convert_to_string=True): """ Check whether the given TXT config contents (or config string) is well formed and contains all the requested parameters. :param config_contents: :type config_contents: string :param convert_to_string: the ``config_contents`` must be converted to a config string :type convert_to_string: bool :rtype: :class:`aeneas.validator.ValidatorResult` """ self._log("Checking contents TXT config file") result = ValidatorResult() if convert_to_string: #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting file contents to config string") config_string = gf.config_txt_to_string(config_contents) #else: #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # check if it is well encoded self._log("Checking that string is well encoded") if not self.check_string_well_encoded(config_string): msg = "The TXT config is not well encoded" result.passed = False result.add_error(msg) self._log(msg) return result # check required parameters self._log("Checking required parameters") required_parameters = [ gc.PPN_JOB_IS_HIERARCHY_TYPE, gc.PPN_JOB_IS_HIERARCHY_PREFIX, gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_FORMAT, gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX, gc.PPN_JOB_OS_FILE_NAME, gc.PPN_JOB_OS_CONTAINER_FORMAT, gc.PPN_JOB_OS_HIERARCHY_TYPE, gc.PPN_JOB_OS_HIERARCHY_PREFIX, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_JOB_LANGUAGE ] parameters = gf.config_string_to_dict(config_string, result) self._check_required_parameters(required_parameters, parameters, result) # return result self._log( ["Checking contents TXT config file: returning %s", result.passed]) return result
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param string config_string: the configuration string :rtype: :class:`~aeneas.job.Job` """ self.log(u"Analyzing container with TXT config string") if config_string is None: self.log(u"Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self.log([u"Found TXT config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self.log([u"Directory of TXT config entry: '%s'", config_dir]) self.log([u"Reading TXT config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) self.log(u"Converting config contents to config string") config_contents = gf.safe_unicode(config_contents) config_string = gf.config_txt_to_string(config_contents) else: self.log([u"Analyzing container with TXT config string '%s'", config_string]) config_dir = "" self.log(u"Creating the Job object") job = Job(config_string) self.log(u"Getting entries") entries = self.container.entries self.log(u"Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) self.log(u"Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self.log([u"Path of the tasks root directory: '%s'", tasks_root_directory]) self.log(u"Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory]) text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self.log([u"Relative path for text file: '%s'", text_file_relative_path]) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self.log([u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self.log([u"Relative path for audio file: '%s'", audio_file_relative_path]) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self.log([u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]]) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self.log(u"Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self.log([u"Found audio files: '%s'", audio_files]) self.log(u"Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self.log(u"Matching files in flat hierarchy... done") for task_info in matched_tasks: self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self.log(u"Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self.log([u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path]) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self.log([u"Found audio files: '%s'", audio_files]) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self.log([u"Exactly one text file and one audio file in '%s'", matched_directory]) task_info = [ matched_directory, text_files[0], audio_files[0] ] self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self.log([u"More than one text file in '%s'", matched_directory]) elif len(audio_files) > 1: self.log([u"More than one audio file in '%s'", matched_directory]) else: self.log([u"No text nor audio file in '%s'", matched_directory]) return job
def check_contents_txt_config_file( self, config_contents, convert_to_string=True ): """ Check whether the given TXT config contents (or config string) is well formed and contains all the requested parameters. :param config_contents: :type config_contents: string :param convert_to_string: the ``config_contents`` must be converted to a config string :type convert_to_string: bool :rtype: :class:`aeneas.validator.ValidatorResult` """ self._log("Checking contents TXT config file") result = ValidatorResult() if convert_to_string: #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting file contents to config string") config_string = gf.config_txt_to_string(config_contents) #else: #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # check if it is well encoded self._log("Checking that string is well encoded") if not self.check_string_well_encoded(config_string): msg = "The TXT config is not well encoded" result.passed = False result.add_error(msg) self._log(msg) return result # check required parameters self._log("Checking required parameters") required_parameters = [ gc.PPN_JOB_IS_HIERARCHY_TYPE, gc.PPN_JOB_IS_HIERARCHY_PREFIX, gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_FORMAT, gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH, gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX, gc.PPN_JOB_OS_FILE_NAME, gc.PPN_JOB_OS_CONTAINER_FORMAT, gc.PPN_JOB_OS_HIERARCHY_TYPE, gc.PPN_JOB_OS_HIERARCHY_PREFIX, gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_JOB_LANGUAGE ] parameters = gf.config_string_to_dict(config_string, result) self._check_required_parameters(required_parameters, parameters, result) # return result self._log(["Checking contents TXT config file: returning %s", result.passed]) return result
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param string config_string: the configuration string :rtype: :class:`~aeneas.job.Job` """ self.log(u"Analyzing container with TXT config string") if config_string is None: self.log(u"Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self.log([u"Found TXT config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self.log([u"Directory of TXT config entry: '%s'", config_dir]) self.log([u"Reading TXT config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) self.log(u"Converting config contents to config string") config_contents = gf.safe_unicode(config_contents) config_string = gf.config_txt_to_string(config_contents) else: self.log([ u"Analyzing container with TXT config string '%s'", config_string ]) config_dir = "" self.log(u"Creating the Job object") job = Job(config_string) self.log(u"Getting entries") entries = self.container.entries self.log(u"Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) self.log(u"Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX]) self.log( [u"Path of the tasks root directory: '%s'", tasks_root_directory]) self.log(u"Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self.log([ u"Path of the sync map root directory: '%s'", sync_map_root_directory ]) text_file_relative_path = parameters[ gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self.log( [u"Relative path for text file: '%s'", text_file_relative_path]) text_file_name_regex = re.compile( r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self.log([ u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX] ]) audio_file_relative_path = parameters[ gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self.log( [u"Relative path for audio file: '%s'", audio_file_relative_path]) audio_file_name_regex = re.compile( r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self.log([ u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX] ]) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self.log(u"Looking for text/audio pairs in flat hierarchy") text_files = self._find_files(entries, tasks_root_directory, text_file_relative_path, text_file_name_regex) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files(entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex) self.log([u"Found audio files: '%s'", audio_files]) self.log(u"Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files) self.log(u"Matching files in flat hierarchy... done") for task_info in matched_tasks: self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task(task_info, config_string, sync_map_root_directory, job_os_hierarchy_type) job.add_task(task) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self.log(u"Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX]) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory) self.log([ u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path ]) # look for text and audio files there text_files = self._find_files(entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files(entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex) self.log([u"Found audio files: '%s'", audio_files]) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self.log([ u"Exactly one text file and one audio file in '%s'", matched_directory ]) task_info = [ matched_directory, text_files[0], audio_files[0] ] self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task(task_info, config_string, sync_map_root_directory, job_os_hierarchy_type) job.add_task(task) elif len(text_files) > 1: self.log([ u"More than one text file in '%s'", matched_directory ]) elif len(audio_files) > 1: self.log([ u"More than one audio file in '%s'", matched_directory ]) else: self.log( [u"No text nor audio file in '%s'", matched_directory]) return job
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param config_string: the configuration string :type config_string: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with TXT config string") if config_string == None: self._log("Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self._log("Found TXT config entry '%s'" % config_entry) config_dir = os.path.dirname(config_entry) self._log("Directory of TXT config entry: '%s'" % config_dir) self._log("Reading TXT config entry: '%s'" % config_entry) config_contents = self.container.read_entry(config_entry) #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting config contents to config string") config_string = gf.config_txt_to_string(config_contents) else: self._log("Analyzing container with TXT config string '%s'" % config_string) config_dir = "" #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # create the Job object to be returned self._log("Creating the Job object") job = Job(config_string) # get the entries in this container self._log("Getting entries") entries = self.container.entries() # convert the config string to dict self._log("Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) # compute the root directory for the task assets self._log("Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self._log("Path of the tasks root directory: '%s'" % tasks_root_directory) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log("Path of the sync map root directory: '%s'" % sync_map_root_directory) # prepare relative path and file name regex for text and audio files text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self._log("Relative path for text file: '%s'" % text_file_relative_path) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self._log("Regex for text file: '%s'" % parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self._log("Relative path for audio file: '%s'" % audio_file_relative_path) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self._log("Regex for audio file: '%s'" % parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) # flat hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self._log("Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self._log("Found text files: '%s'" % str(text_files)) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self._log("Found audio files: '%s'" % str(audio_files)) self._log("Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self._log("Matching files in flat hierarchy... done") for task_info in matched_tasks: self._log("Creating task: '%s'" % str(task_info)) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) # paged hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self._log("Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self._log("Looking for text/audio pairs in directory '%s'" % matched_directory_full_path) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self._log("Found text files: '%s'" % str(text_files)) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self._log("Found audio files: '%s'" % str(audio_files)) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self._log("Exactly one text file and one audio file in '%s'" % matched_directory) task_info = [ matched_directory, text_files[0], audio_files[0] ] self._log("Creating task: '%s'" % str(task_info)) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self._log("More than one text file in '%s'" % matched_directory) elif len(audio_files) > 1: self._log("More than one audio file in '%s'" % matched_directory) else: self._log("No text nor audio file in '%s'" % matched_directory) # return the Job return job