def _compute_sync_map_file_path( self, root, hierarchy_type, custom_id, file_name ): """ Compute the sync map file path inside the output container. :param root: the root of the sync map files inside the container :type root: string (path) :param job_os_hierarchy_type: type of job output hierarchy :type job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType` :param custom_id: the task custom id (flat) or page directory name (paged) :type custom_id: string :param file_name: the output file name for the sync map :type file_name: string :rtype: string (path) """ prefix = root if hierarchy_type == HierarchyType.PAGED: prefix = gf.norm_join(prefix, custom_id) file_name_joined = gf.norm_join(prefix, file_name) return self._replace_placeholder(file_name_joined, custom_id)
def _find_files(self, entries, root, relative_path, file_name_regex): """ Return the elements in entries that 1. are in ``root/relative_path``, and 2. match ``file_name_regex``. :param list entries: the list of entries (file paths) in the container :param string root: the root directory of the container :param string relative_path: the relative path in which we must search :param regex file_name_regex: the regex matching the desired file names :rtype: list of strings (path) """ self.log([u"Finding files within root: '%s'", root]) target = root if relative_path is not None: self.log([u"Joining relative path: '%s'", relative_path]) target = gf.norm_join(root, relative_path) self.log([u"Finding files within target: '%s'", target]) files = [] target_len = len(target) for entry in entries: if entry.startswith(target): self.log([u"Examining entry: '%s'", entry]) entry_suffix = entry[target_len + 1:] self.log([u"Examining entry suffix: '%s'", entry_suffix]) if re.search(file_name_regex, entry_suffix) is not None: self.log([u"Match: '%s'", entry]) files.append(entry) else: self.log([u"No match: '%s'", entry]) return sorted(files)
def test_norm_join(self): tests = [ (None, None, "."), (None, "", "."), (None, "/foo", "/foo"), (None, "/foo.bar", "/foo.bar"), (None, "/foo/../bar", "/bar"), (None, "/foo/./bar", "/foo/bar"), (None, "/foo/bar/baz", "/foo/bar/baz"), (None, "/foo/bar/../../baz", "/baz"), (None, "/foo/bar/./baz", "/foo/bar/baz"), ("", None, "."), ("/foo", None, "/foo"), ("/foo.bar", None, "/foo.bar"), ("/foo/../bar", None, "/bar"), ("/foo/./bar", None, "/foo/bar"), ("/foo/bar/baz", None, "/foo/bar/baz"), ("/foo/bar/../../baz", None, "/baz"), ("/foo/bar/./baz", None, "/foo/bar/baz"), ("", "", "."), ("/", "", "/"), ("", "/", "/"), ("/", "/", "/"), ("/foo", "bar", "/foo/bar"), ("/foo", "bar/foo.baz", "/foo/bar/foo.baz"), ("/foo", "bar/../foo.baz", "/foo/foo.baz"), ("/foo", "bar/../../foo.baz", "/foo.baz"), ("/foo", "bar.baz", "/foo/bar.baz"), ("/foo/../", "bar.baz", "/bar.baz"), ("/foo/", "../bar.baz", "/bar.baz"), ("/foo/./", "bar.baz", "/foo/bar.baz"), ("/foo/", "./bar.baz", "/foo/bar.baz"), ("foo", "bar", "foo/bar"), ("foo", "bar/foo.baz", "foo/bar/foo.baz"), ("foo", "bar/../foo.baz", "foo/foo.baz"), ("foo", "bar/../../foo.baz", "foo.baz"), ("foo", "bar.baz", "foo/bar.baz"), ("foo/../", "bar.baz", "bar.baz"), ("foo/", "../bar.baz", "bar.baz"), ("foo/./", "bar.baz", "foo/bar.baz"), ("foo/", "./bar.baz", "foo/bar.baz"), ] for test in tests: self.assertEqual(gf.norm_join(test[0], test[1]), test[2])
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param string config_string: the configuration string :rtype: :class:`~aeneas.job.Job` """ self.log(u"Analyzing container with TXT config string") if config_string is None: self.log(u"Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self.log([u"Found TXT config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self.log([u"Directory of TXT config entry: '%s'", config_dir]) self.log([u"Reading TXT config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) self.log(u"Converting config contents to config string") config_contents = gf.safe_unicode(config_contents) config_string = gf.config_txt_to_string(config_contents) else: self.log([u"Analyzing container with TXT config string '%s'", config_string]) config_dir = "" self.log(u"Creating the Job object") job = Job(config_string) self.log(u"Getting entries") entries = self.container.entries self.log(u"Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) self.log(u"Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self.log([u"Path of the tasks root directory: '%s'", tasks_root_directory]) self.log(u"Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory]) text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self.log([u"Relative path for text file: '%s'", text_file_relative_path]) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self.log([u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self.log([u"Relative path for audio file: '%s'", audio_file_relative_path]) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self.log([u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]]) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self.log(u"Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self.log([u"Found audio files: '%s'", audio_files]) self.log(u"Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self.log(u"Matching files in flat hierarchy... done") for task_info in matched_tasks: self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self.log(u"Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self.log([u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path]) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self.log([u"Found text files: '%s'", text_files]) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self.log([u"Found audio files: '%s'", audio_files]) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self.log([u"Exactly one text file and one audio file in '%s'", matched_directory]) task_info = [ matched_directory, text_files[0], audio_files[0] ] self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self.log([u"More than one text file in '%s'", matched_directory]) elif len(audio_files) > 1: self.log([u"More than one audio file in '%s'", matched_directory]) else: self.log([u"No text nor audio file in '%s'", matched_directory]) return job
def _analyze_xml_config(self, config_contents=None): """ Analyze the given container and return the corresponding job. If ``config_contents`` is ``None``, try reading it from the XML config file inside the container. :param string config_contents: the contents of the XML config file :rtype: :class:`~aeneas.job.Job` """ self.log(u"Analyzing container with XML config string") if config_contents is None: self.log(u"Analyzing container with XML config file") config_entry = self.container.entry_config_xml self.log([u"Found XML config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self.log([u"Directory of XML config entry: '%s'", config_dir]) self.log([u"Reading XML config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) else: self.log(u"Analyzing container with XML config contents") config_dir = "" self.log(u"Converting config contents into job config dict") job_parameters = gf.config_xml_to_dict( config_contents, result=None, parse_job=True ) self.log(u"Converting config contents into tasks config dict") tasks_parameters = gf.config_xml_to_dict( config_contents, result=None, parse_job=False ) self.log(u"Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory]) self.log(u"Converting job config dict into job config string") config_string = gf.config_dict_to_string(job_parameters) job = Job(config_string) for task_parameters in tasks_parameters: self.log(u"Converting task config dict into task config string") config_string = gf.config_dict_to_string(task_parameters) self.log([u"Creating task with config string '%s'", config_string]) try: custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID] except KeyError: custom_id = "" task_info = [ custom_id, gf.norm_join( config_dir, task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML] ), gf.norm_join( config_dir, task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML] ) ] self.log([u"Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) return job
def _analyze_xml_config(self, config_contents=None): """ Analyze the given container and return the corresponding job. If ``config_contents`` is ``None``, try reading it from the XML config file inside the container. :param config_contents: the contents of the XML config file :type config_contents: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with XML config string") if config_contents is None: self._log("Analyzing container with XML config file") config_entry = self.container.entry_config_xml self._log(["Found XML config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self._log(["Directory of XML config entry: '%s'", config_dir]) self._log(["Reading XML config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) else: self._log("Analyzing container with XML config contents") config_dir = "" # remove BOM #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) # get the job parameters and tasks parameters self._log("Converting config contents into job config dict") job_parameters = gf.config_xml_to_dict( config_contents, result=None, parse_job=True ) self._log("Converting config contents into tasks config dict") tasks_parameters = gf.config_xml_to_dict( config_contents, result=None, parse_job=False ) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log(["Path of the sync map root directory: '%s'", sync_map_root_directory]) # create the Job object to be returned self._log("Converting job config dict into job config string") config_string = gf.config_dict_to_string(job_parameters) job = Job(config_string) # create the Task objects for task_parameters in tasks_parameters: self._log("Converting task config dict into task config string") config_string = gf.config_dict_to_string(task_parameters) self._log(["Creating task with config string '%s'", config_string]) try: custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID] except KeyError: custom_id = "" task_info = [ custom_id, gf.norm_join( config_dir, task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML] ), gf.norm_join( config_dir, task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML] ) ] self._log(["Creating task: '%s'", str(task_info)]) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) # return the Job return job
def load_job_from_container(self, container_path, config_string=None): """ Validate the given container, and, if it is well formed, load the job from it. If ``config_string`` is ``None``, the container must contain a configuration file; otherwise use the provided config string (i.e., the wizard case). Return ``True`` if the job has been loaded successfully, ``False`` otherwise. :param container_path: the path to the input container :type container_path: string (path) :param config_string: the configuration string (from wizard) :type config_string: string :rtype: bool """ self._log("Loading job from container...") # validate container self._log("Validating container...") validator = Validator(logger=self.logger) if config_string == None: validator_result = validator.check_container(container_path) else: validator_result = validator.check_container_from_wizard( container_path, config_string ) if not validator_result.passed: self._log("Validating container: failed") self._log("Loading job from container: failed") return False self._log("Validating container: succeeded") try: # create working directory where the input container # will be decompressed self.working_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir()) self._log("Created working directory '%s'" % self.working_directory) # decompress self._log("Decompressing input container...") input_container = Container(container_path, logger=self.logger) input_container.decompress(self.working_directory) self._log("Decompressing input container... done") # create job from the working directory self._log("Creating job from working directory...") working_container = Container( self.working_directory, logger=self.logger ) analyzer = AnalyzeContainer(working_container, logger=self.logger) if config_string == None: self.job = analyzer.analyze() else: self.job = analyzer.analyze_from_wizard(config_string) self._log("Creating job from working directory... done") # set absolute path for text file and audio file # for each task in the job self._log("Setting absolute paths for tasks...") for task in self.job.tasks: task.text_file_path_absolute = gf.norm_join( self.working_directory, task.text_file_path ) task.audio_file_path_absolute = gf.norm_join( self.working_directory, task.audio_file_path ) self._log("Setting absolute paths for tasks... done") # return self._log("Loading job from container: succeeded") return True except: # failure: clean and return self.clean() self._log("Loading job from container: failed") return False
def write_output_container(self, output_directory_path): """ Write the output container for this job. Return a pair ``(bool, string)``, where the bool indicates whether the execution succeeded, and the string is the path to output container. :param output_directory_path: the path to a directory where the output container must be created :type output_directory_path: string (path) :rtype: (bool, string) """ self._log("Writing output container for this job") # check if the job has tasks if self.job == None: self._log("job is None") return (False, None) if len(self.job) == 0: self._log("The job has no tasks") return (False, None) try: # create temporary directory where the sync map files # will be created # this temporary directory will be compressed into # the output container self.tmp_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir()) self._log("Created temporary directory '%s'" % self.tmp_directory) for task in self.job.tasks: custom_id = task.configuration.custom_id # check if the task has sync map and sync map file path if task.sync_map_file_path == None: self._log("Task '%s' has sync_map_file_path not set" % custom_id) return (False, None) if task.sync_map == None: self._log("Task '%s' has sync_map not set" % custom_id) return (False, None) # output sync map self._log("Outputting sync map for task '%s'..." % custom_id) task.output_sync_map_file(self.tmp_directory) self._log("Outputting sync map for task '%s'... done" % custom_id) # get output container info output_container_format = self.job.configuration.os_container_format self._log("Output container format: '%s'" % output_container_format) output_file_name = self.job.configuration.os_file_name if ((output_container_format != ContainerFormat.UNPACKED) and (not output_file_name.endswith(output_container_format))): self._log("Adding extension to output_file_name") output_file_name += "." + output_container_format self._log("Output file name: '%s'" % output_file_name) output_file_path = gf.norm_join( output_directory_path, output_file_name ) self._log("Output file path: '%s'" % output_file_path) # create output container self._log("Compressing...") container = Container( output_file_path, output_container_format, logger=self.logger ) container.compress(self.tmp_directory) self._log("Compressing... done") self._log("Created output file: '%s'" % output_file_path) # clean and return self.clean(False) return (True, output_file_path) except: self.clean(False) return (False, None)
def write_output_container(self, output_directory_path): """ Write the output container for this job. Return the path to output container, which is the concatenation of ``output_directory_path`` and of the output container file or directory name. :param string output_directory_path: the path to a directory where the output container must be created :rtype: string :raises: :class:`~aeneas.executejob.ExecuteJobOutputError`: if there is a problem while writing the output container """ self.log(u"Writing output container for this job") if self.job is None: self.log_exc(u"The job object is None", None, True, ExecuteJobOutputError) if len(self.job) == 0: self.log_exc(u"The job has no tasks", None, True, ExecuteJobOutputError) self.log([u"Number of tasks: '%d'", len(self.job)]) # create temporary directory where the sync map files # will be created # this temporary directory will be compressed into # the output container self.tmp_directory = gf.tmp_directory(root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Created temporary directory '%s'", self.tmp_directory]) for task in self.job.tasks: custom_id = task.configuration["custom_id"] # check if the task has sync map and sync map file path if task.sync_map_file_path is None: self.log_exc(u"Task '%s' has sync_map_file_path not set" % (custom_id), None, True, ExecuteJobOutputError) if task.sync_map is None: self.log_exc(u"Task '%s' has sync_map not set" % (custom_id), None, True, ExecuteJobOutputError) try: # output sync map self.log([u"Outputting sync map for task '%s'...", custom_id]) task.output_sync_map_file(self.tmp_directory) self.log([u"Outputting sync map for task '%s'... done", custom_id]) except Exception as exc: self.log_exc(u"Error while outputting sync map for task '%s'" % (custom_id), None, True, ExecuteJobOutputError) # get output container info output_container_format = self.job.configuration["o_container_format"] self.log([u"Output container format: '%s'", output_container_format]) output_file_name = self.job.configuration["o_name"] if ((output_container_format != ContainerFormat.UNPACKED) and (not output_file_name.endswith(output_container_format))): self.log(u"Adding extension to output_file_name") output_file_name += "." + output_container_format self.log([u"Output file name: '%s'", output_file_name]) output_file_path = gf.norm_join( output_directory_path, output_file_name ) self.log([u"Output file path: '%s'", output_file_path]) try: self.log(u"Compressing...") container = Container( output_file_path, output_container_format, logger=self.logger ) container.compress(self.tmp_directory) self.log(u"Compressing... done") self.log([u"Created output file: '%s'", output_file_path]) self.log(u"Writing output container for this job: succeeded") self.clean(False) return output_file_path except Exception as exc: self.clean(False) self.log_exc(u"Error while compressing", exc, True, ExecuteJobOutputError) return None
def load_job_from_container(self, container_path, config_string=None): """ Load the job from the given :class:`aeneas.container.Container` object. If ``config_string`` is ``None``, the container must contain a configuration file; otherwise use the provided config string (i.e., the wizard case). :param string container_path: the path to the input container :param string config_string: the configuration string (from wizard) :raises: :class:`~aeneas.executejob.ExecuteJobInputError`: if the given container does not contain a valid :class:`~aeneas.job.Job` """ self.log(u"Loading job from container...") # create working directory where the input container # will be decompressed self.working_directory = gf.tmp_directory(root=self.rconf[RuntimeConfiguration.TMP_PATH]) self.log([u"Created working directory '%s'", self.working_directory]) try: self.log(u"Decompressing input container...") input_container = Container(container_path, logger=self.logger) input_container.decompress(self.working_directory) self.log(u"Decompressing input container... done") except Exception as exc: self.clean() self.log_exc(u"Unable to decompress container '%s': %s" % (container_path, exc), None, True, ExecuteJobInputError) try: self.log(u"Creating job from working directory...") working_container = Container( self.working_directory, logger=self.logger ) analyzer = AnalyzeContainer(working_container, logger=self.logger) self.job = analyzer.analyze(config_string=config_string) self.log(u"Creating job from working directory... done") except Exception as exc: self.clean() self.log_exc(u"Unable to analyze container '%s': %s" % (container_path, exc), None, True, ExecuteJobInputError) if self.job is None: self.log_exc(u"The container '%s' does not contain a valid Job" % (container_path), None, True, ExecuteJobInputError) try: # set absolute path for text file and audio file # for each task in the job self.log(u"Setting absolute paths for tasks...") for task in self.job.tasks: task.text_file_path_absolute = gf.norm_join( self.working_directory, task.text_file_path ) task.audio_file_path_absolute = gf.norm_join( self.working_directory, task.audio_file_path ) self.log(u"Setting absolute paths for tasks... done") self.log(u"Loading job from container: succeeded") except Exception as exc: self.clean() self.log_exc(u"Error while setting absolute paths for tasks", exc, True, ExecuteJobInputError)
def _analyze_xml_config(self, config_contents=None): """ Analyze the given container and return the corresponding job. If ``config_contents`` is ``None``, try reading it from the XML config file inside the container. :param config_contents: the contents of the XML config file :type config_contents: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with XML config string") if config_contents is None: self._log("Analyzing container with XML config file") config_entry = self.container.entry_config_xml self._log(["Found XML config entry '%s'", config_entry]) config_dir = os.path.dirname(config_entry) self._log(["Directory of XML config entry: '%s'", config_dir]) self._log(["Reading XML config entry: '%s'", config_entry]) config_contents = self.container.read_entry(config_entry) else: self._log("Analyzing container with XML config contents") config_dir = "" # remove BOM #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) # get the job parameters and tasks parameters self._log("Converting config contents into job config dict") job_parameters = gf.config_xml_to_dict(config_contents, result=None, parse_job=True) self._log("Converting config contents into tasks config dict") tasks_parameters = gf.config_xml_to_dict(config_contents, result=None, parse_job=False) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]) job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log([ "Path of the sync map root directory: '%s'", sync_map_root_directory ]) # create the Job object to be returned self._log("Converting job config dict into job config string") config_string = gf.config_dict_to_string(job_parameters) job = Job(config_string) # create the Task objects for task_parameters in tasks_parameters: self._log("Converting task config dict into task config string") config_string = gf.config_dict_to_string(task_parameters) self._log(["Creating task with config string '%s'", config_string]) try: custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID] except KeyError: custom_id = "" task_info = [ custom_id, gf.norm_join(config_dir, task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML]), gf.norm_join(config_dir, task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML]) ] self._log(["Creating task: '%s'", str(task_info)]) task = self._create_task(task_info, config_string, sync_map_root_directory, job_os_hierarchy_type) job.add_task(task) # return the Job return job
def _analyze_txt_config(self, config_string=None): """ Analyze the given container and return the corresponding job. If ``config_string`` is ``None``, try reading it from the TXT config file inside the container. :param config_string: the configuration string :type config_string: string :rtype: :class:`aeneas.job.Job` """ # TODO break this function down into smaller functions self._log("Analyzing container with TXT config string") if config_string == None: self._log("Analyzing container with TXT config file") config_entry = self.container.entry_config_txt self._log("Found TXT config entry '%s'" % config_entry) config_dir = os.path.dirname(config_entry) self._log("Directory of TXT config entry: '%s'" % config_dir) self._log("Reading TXT config entry: '%s'" % config_entry) config_contents = self.container.read_entry(config_entry) #self._log("Removing BOM") #config_contents = gf.remove_bom(config_contents) self._log("Converting config contents to config string") config_string = gf.config_txt_to_string(config_contents) else: self._log("Analyzing container with TXT config string '%s'" % config_string) config_dir = "" #self._log("Removing BOM") #config_string = gf.remove_bom(config_string) # create the Job object to be returned self._log("Creating the Job object") job = Job(config_string) # get the entries in this container self._log("Getting entries") entries = self.container.entries() # convert the config string to dict self._log("Converting config string into config dict") parameters = gf.config_string_to_dict(config_string) # compute the root directory for the task assets self._log("Calculating the path of the tasks root directory") tasks_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX] ) self._log("Path of the tasks root directory: '%s'" % tasks_root_directory) # compute the root directory for the sync map files self._log("Calculating the path of the sync map root directory") sync_map_root_directory = gf.norm_join( config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX] ) job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE] self._log("Path of the sync map root directory: '%s'" % sync_map_root_directory) # prepare relative path and file name regex for text and audio files text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH] self._log("Relative path for text file: '%s'" % text_file_relative_path) text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) self._log("Regex for text file: '%s'" % parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]) audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH] self._log("Relative path for audio file: '%s'" % audio_file_relative_path) audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) self._log("Regex for audio file: '%s'" % parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]) # flat hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT: self._log("Looking for text/audio pairs in flat hierarchy") text_files = self._find_files( entries, tasks_root_directory, text_file_relative_path, text_file_name_regex ) self._log("Found text files: '%s'" % str(text_files)) audio_files = self._find_files( entries, tasks_root_directory, audio_file_relative_path, audio_file_name_regex ) self._log("Found audio files: '%s'" % str(audio_files)) self._log("Matching files in flat hierarchy...") matched_tasks = self._match_files_flat_hierarchy( text_files, audio_files ) self._log("Matching files in flat hierarchy... done") for task_info in matched_tasks: self._log("Creating task: '%s'" % str(task_info)) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) # paged hierarchy if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED: self._log("Looking for text/audio pairs in paged hierarchy") # find all subdirectories of tasks_root_directory # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX matched_directories = self._match_directories( entries, tasks_root_directory, parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX] ) for matched_directory in matched_directories: # rebuild the full path matched_directory_full_path = gf.norm_join( tasks_root_directory, matched_directory ) self._log("Looking for text/audio pairs in directory '%s'" % matched_directory_full_path) # look for text and audio files there text_files = self._find_files( entries, matched_directory_full_path, text_file_relative_path, text_file_name_regex ) self._log("Found text files: '%s'" % str(text_files)) audio_files = self._find_files( entries, matched_directory_full_path, audio_file_relative_path, audio_file_name_regex ) self._log("Found audio files: '%s'" % str(audio_files)) # if we have found exactly one text and one audio file, # create a Task if (len(text_files) == 1) and (len(audio_files) == 1): self._log("Exactly one text file and one audio file in '%s'" % matched_directory) task_info = [ matched_directory, text_files[0], audio_files[0] ] self._log("Creating task: '%s'" % str(task_info)) task = self._create_task( task_info, config_string, sync_map_root_directory, job_os_hierarchy_type ) job.add_task(task) elif len(text_files) > 1: self._log("More than one text file in '%s'" % matched_directory) elif len(audio_files) > 1: self._log("More than one audio file in '%s'" % matched_directory) else: self._log("No text nor audio file in '%s'" % matched_directory) # return the Job return job