Python norm_join Examples, aeneas.globalfunctions.norm_join Python Examples

Example #1

0

Show file

File: analyzecontainer.py Project: cambell-prince/aeneas

    def _compute_sync_map_file_path(
            self,
            root,
            hierarchy_type,
            custom_id,
            file_name
        ):
        """
        Compute the sync map file path inside the output container.

        :param root: the root of the sync map files inside the container
        :type  root: string (path)
        :param job_os_hierarchy_type: type of job output hierarchy
        :type  job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType`
        :param custom_id: the task custom id (flat) or
                          page directory name (paged)
        :type  custom_id: string
        :param file_name: the output file name for the sync map
        :type  file_name: string
        :rtype: string (path)
        """
        prefix = root
        if hierarchy_type == HierarchyType.PAGED:
            prefix = gf.norm_join(prefix, custom_id)
        file_name_joined = gf.norm_join(prefix, file_name)
        return self._replace_placeholder(file_name_joined, custom_id)

Example #2

0

Show file

File: analyzecontainer.py Project: eomerdws/aeneas

    def _find_files(self, entries, root, relative_path, file_name_regex):
        """
        Return the elements in entries that

        1. are in ``root/relative_path``, and
        2. match ``file_name_regex``.

        :param list entries: the list of entries (file paths) in the container
        :param string root: the root directory of the container
        :param string relative_path: the relative path in which we must search
        :param regex file_name_regex: the regex matching the desired file names
        :rtype: list of strings (path)
        """
        self.log([u"Finding files within root: '%s'", root])
        target = root
        if relative_path is not None:
            self.log([u"Joining relative path: '%s'", relative_path])
            target = gf.norm_join(root, relative_path)
        self.log([u"Finding files within target: '%s'", target])
        files = []
        target_len = len(target)
        for entry in entries:
            if entry.startswith(target):
                self.log([u"Examining entry: '%s'", entry])
                entry_suffix = entry[target_len + 1:]
                self.log([u"Examining entry suffix: '%s'", entry_suffix])
                if re.search(file_name_regex, entry_suffix) is not None:
                    self.log([u"Match: '%s'", entry])
                    files.append(entry)
                else:
                    self.log([u"No match: '%s'", entry])
        return sorted(files)

Example #3

0

Show file

File: test_globalfunctions.py Project: ptrwtts/aeneas

 def test_norm_join(self):
     tests = [
         (None, None, "."),
         (None, "", "."),
         (None, "/foo", "/foo"),
         (None, "/foo.bar", "/foo.bar"),
         (None, "/foo/../bar", "/bar"),
         (None, "/foo/./bar", "/foo/bar"),
         (None, "/foo/bar/baz", "/foo/bar/baz"),
         (None, "/foo/bar/../../baz", "/baz"),
         (None, "/foo/bar/./baz", "/foo/bar/baz"),
         ("", None, "."),
         ("/foo", None, "/foo"),
         ("/foo.bar", None, "/foo.bar"),
         ("/foo/../bar", None, "/bar"),
         ("/foo/./bar", None, "/foo/bar"),
         ("/foo/bar/baz", None, "/foo/bar/baz"),
         ("/foo/bar/../../baz", None, "/baz"),
         ("/foo/bar/./baz", None, "/foo/bar/baz"),
         ("", "", "."),
         ("/", "", "/"),
         ("", "/", "/"),
         ("/", "/", "/"),
         ("/foo", "bar", "/foo/bar"),
         ("/foo", "bar/foo.baz", "/foo/bar/foo.baz"),
         ("/foo", "bar/../foo.baz", "/foo/foo.baz"),
         ("/foo", "bar/../../foo.baz", "/foo.baz"),
         ("/foo", "bar.baz", "/foo/bar.baz"),
         ("/foo/../", "bar.baz", "/bar.baz"),
         ("/foo/", "../bar.baz", "/bar.baz"),
         ("/foo/./", "bar.baz", "/foo/bar.baz"),
         ("/foo/", "./bar.baz", "/foo/bar.baz"),
         ("foo", "bar", "foo/bar"),
         ("foo", "bar/foo.baz", "foo/bar/foo.baz"),
         ("foo", "bar/../foo.baz", "foo/foo.baz"),
         ("foo", "bar/../../foo.baz", "foo.baz"),
         ("foo", "bar.baz", "foo/bar.baz"),
         ("foo/../", "bar.baz", "bar.baz"),
         ("foo/", "../bar.baz", "bar.baz"),
         ("foo/./", "bar.baz", "foo/bar.baz"),
         ("foo/", "./bar.baz", "foo/bar.baz"),
     ]
     for test in tests:
         self.assertEqual(gf.norm_join(test[0], test[1]), test[2])

Example #4

0

Show file

File: analyzecontainer.py Project: eomerdws/aeneas

    def _analyze_txt_config(self, config_string=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_string`` is ``None``,
        try reading it from the TXT config file inside the container.

        :param string config_string: the configuration string
        :rtype: :class:`~aeneas.job.Job`
        """
        self.log(u"Analyzing container with TXT config string")

        if config_string is None:
            self.log(u"Analyzing container with TXT config file")
            config_entry = self.container.entry_config_txt
            self.log([u"Found TXT config entry '%s'", config_entry])
            config_dir = os.path.dirname(config_entry)
            self.log([u"Directory of TXT config entry: '%s'", config_dir])
            self.log([u"Reading TXT config entry: '%s'", config_entry])
            config_contents = self.container.read_entry(config_entry)
            self.log(u"Converting config contents to config string")
            config_contents = gf.safe_unicode(config_contents)
            config_string = gf.config_txt_to_string(config_contents)
        else:
            self.log([u"Analyzing container with TXT config string '%s'", config_string])
            config_dir = ""

        self.log(u"Creating the Job object")
        job = Job(config_string)

        self.log(u"Getting entries")
        entries = self.container.entries

        self.log(u"Converting config string into config dict")
        parameters = gf.config_string_to_dict(config_string)

        self.log(u"Calculating the path of the tasks root directory")
        tasks_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX]
        )
        self.log([u"Path of the tasks root directory: '%s'", tasks_root_directory])

        self.log(u"Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]
        )
        job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory])

        text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH]
        self.log([u"Relative path for text file: '%s'", text_file_relative_path])
        text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        self.log([u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]])
        audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH]
        self.log([u"Relative path for audio file: '%s'", audio_file_relative_path])
        audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])
        self.log([u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]])

        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT:
            self.log(u"Looking for text/audio pairs in flat hierarchy")
            text_files = self._find_files(
                entries,
                tasks_root_directory,
                text_file_relative_path,
                text_file_name_regex
            )
            self.log([u"Found text files: '%s'", text_files])
            audio_files = self._find_files(
                entries,
                tasks_root_directory,
                audio_file_relative_path,
                audio_file_name_regex
            )
            self.log([u"Found audio files: '%s'", audio_files])

            self.log(u"Matching files in flat hierarchy...")
            matched_tasks = self._match_files_flat_hierarchy(
                text_files,
                audio_files
            )
            self.log(u"Matching files in flat hierarchy... done")

            for task_info in matched_tasks:
                self.log([u"Creating task: '%s'", str(task_info)])
                task = self._create_task(
                    task_info,
                    config_string,
                    sync_map_root_directory,
                    job_os_hierarchy_type
                )
                job.add_task(task)

        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED:
            self.log(u"Looking for text/audio pairs in paged hierarchy")
            # find all subdirectories of tasks_root_directory
            # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX
            matched_directories = self._match_directories(
                entries,
                tasks_root_directory,
                parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX]
            )
            for matched_directory in matched_directories:
                # rebuild the full path
                matched_directory_full_path = gf.norm_join(
                    tasks_root_directory,
                    matched_directory
                )
                self.log([u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path])

                # look for text and audio files there
                text_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    text_file_relative_path,
                    text_file_name_regex
                )
                self.log([u"Found text files: '%s'", text_files])
                audio_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    audio_file_relative_path,
                    audio_file_name_regex
                )
                self.log([u"Found audio files: '%s'", audio_files])

                # if we have found exactly one text and one audio file,
                # create a Task
                if (len(text_files) == 1) and (len(audio_files) == 1):
                    self.log([u"Exactly one text file and one audio file in '%s'", matched_directory])
                    task_info = [
                        matched_directory,
                        text_files[0],
                        audio_files[0]
                    ]
                    self.log([u"Creating task: '%s'", str(task_info)])
                    task = self._create_task(
                        task_info,
                        config_string,
                        sync_map_root_directory,
                        job_os_hierarchy_type
                    )
                    job.add_task(task)
                elif len(text_files) > 1:
                    self.log([u"More than one text file in '%s'", matched_directory])
                elif len(audio_files) > 1:
                    self.log([u"More than one audio file in '%s'", matched_directory])
                else:
                    self.log([u"No text nor audio file in '%s'", matched_directory])

        return job

Example #5

0

Show file

File: analyzecontainer.py Project: eomerdws/aeneas

    def _analyze_xml_config(self, config_contents=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_contents`` is ``None``,
        try reading it from the XML config file inside the container.

        :param string config_contents: the contents of the XML config file
        :rtype: :class:`~aeneas.job.Job`
        """
        self.log(u"Analyzing container with XML config string")

        if config_contents is None:
            self.log(u"Analyzing container with XML config file")
            config_entry = self.container.entry_config_xml
            self.log([u"Found XML config entry '%s'", config_entry])
            config_dir = os.path.dirname(config_entry)
            self.log([u"Directory of XML config entry: '%s'", config_dir])
            self.log([u"Reading XML config entry: '%s'", config_entry])
            config_contents = self.container.read_entry(config_entry)
        else:
            self.log(u"Analyzing container with XML config contents")
            config_dir = ""

        self.log(u"Converting config contents into job config dict")
        job_parameters = gf.config_xml_to_dict(
            config_contents,
            result=None,
            parse_job=True
        )
        self.log(u"Converting config contents into tasks config dict")
        tasks_parameters = gf.config_xml_to_dict(
            config_contents,
            result=None,
            parse_job=False
        )

        self.log(u"Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir,
            job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]
        )
        job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory])

        self.log(u"Converting job config dict into job config string")
        config_string = gf.config_dict_to_string(job_parameters)
        job = Job(config_string)

        for task_parameters in tasks_parameters:
            self.log(u"Converting task config dict into task config string")
            config_string = gf.config_dict_to_string(task_parameters)
            self.log([u"Creating task with config string '%s'", config_string])
            try:
                custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID]
            except KeyError:
                custom_id = ""
            task_info = [
                custom_id,
                gf.norm_join(
                    config_dir,
                    task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML]
                ),
                gf.norm_join(
                    config_dir,
                    task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML]
                )
            ]
            self.log([u"Creating task: '%s'", str(task_info)])
            task = self._create_task(
                task_info,
                config_string,
                sync_map_root_directory,
                job_os_hierarchy_type
            )
            job.add_task(task)

        return job

Example #6

0

Show file

File: analyzecontainer.py Project: cambell-prince/aeneas

    def _analyze_xml_config(self, config_contents=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_contents`` is ``None``,
        try reading it from the XML config file inside the container.

        :param config_contents: the contents of the XML config file
        :type  config_contents: string
        :rtype: :class:`aeneas.job.Job`
        """
        # TODO break this function down into smaller functions
        self._log("Analyzing container with XML config string")

        if config_contents is None:
            self._log("Analyzing container with XML config file")
            config_entry = self.container.entry_config_xml
            self._log(["Found XML config entry '%s'", config_entry])
            config_dir = os.path.dirname(config_entry)
            self._log(["Directory of XML config entry: '%s'", config_dir])
            self._log(["Reading XML config entry: '%s'", config_entry])
            config_contents = self.container.read_entry(config_entry)
        else:
            self._log("Analyzing container with XML config contents")
            config_dir = ""

        # remove BOM
        #self._log("Removing BOM")
        #config_contents = gf.remove_bom(config_contents)

        # get the job parameters and tasks parameters
        self._log("Converting config contents into job config dict")
        job_parameters = gf.config_xml_to_dict(
            config_contents,
            result=None,
            parse_job=True
        )
        self._log("Converting config contents into tasks config dict")
        tasks_parameters = gf.config_xml_to_dict(
            config_contents,
            result=None,
            parse_job=False
        )

        # compute the root directory for the sync map files
        self._log("Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir,
            job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]
        )
        job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self._log(["Path of the sync map root directory: '%s'", sync_map_root_directory])

        # create the Job object to be returned
        self._log("Converting job config dict into job config string")
        config_string = gf.config_dict_to_string(job_parameters)
        job = Job(config_string)

        # create the Task objects
        for task_parameters in tasks_parameters:
            self._log("Converting task config dict into task config string")
            config_string = gf.config_dict_to_string(task_parameters)
            self._log(["Creating task with config string '%s'", config_string])
            try:
                custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID]
            except KeyError:
                custom_id = ""
            task_info = [
                custom_id,
                gf.norm_join(
                    config_dir,
                    task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML]
                ),
                gf.norm_join(
                    config_dir,
                    task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML]
                )
            ]
            self._log(["Creating task: '%s'", str(task_info)])
            task = self._create_task(
                task_info,
                config_string,
                sync_map_root_directory,
                job_os_hierarchy_type
            )
            job.add_task(task)

        # return the Job
        return job

Example #7

0

Show file

File: executejob.py Project: dburt/aeneas

    def load_job_from_container(self, container_path, config_string=None):
        """
        Validate the given container, and, if it is well formed,
        load the job from it.

        If ``config_string`` is ``None``,
        the container must contain a configuration file;
        otherwise use the provided config string
        (i.e., the wizard case).

        Return ``True`` if the job has been loaded successfully,
        ``False`` otherwise.

        :param container_path: the path to the input container
        :type  container_path: string (path)
        :param config_string: the configuration string (from wizard)
        :type  config_string: string
        :rtype: bool
        """
        self._log("Loading job from container...")

        # validate container
        self._log("Validating container...")
        validator = Validator(logger=self.logger)
        if config_string == None:
            validator_result = validator.check_container(container_path)
        else:
            validator_result = validator.check_container_from_wizard(
                container_path,
                config_string
            )
        if not validator_result.passed:
            self._log("Validating container: failed")
            self._log("Loading job from container: failed")
            return False
        self._log("Validating container: succeeded")

        try:
            # create working directory where the input container
            # will be decompressed
            self.working_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir())
            self._log("Created working directory '%s'" % self.working_directory)

            # decompress
            self._log("Decompressing input container...")
            input_container = Container(container_path, logger=self.logger)
            input_container.decompress(self.working_directory)
            self._log("Decompressing input container... done")

            # create job from the working directory
            self._log("Creating job from working directory...")
            working_container = Container(
                self.working_directory,
                logger=self.logger
            )
            analyzer = AnalyzeContainer(working_container, logger=self.logger)
            if config_string == None:
                self.job = analyzer.analyze()
            else:
                self.job = analyzer.analyze_from_wizard(config_string)
            self._log("Creating job from working directory... done")

            # set absolute path for text file and audio file
            # for each task in the job
            self._log("Setting absolute paths for tasks...")
            for task in self.job.tasks:
                task.text_file_path_absolute = gf.norm_join(
                    self.working_directory,
                    task.text_file_path
                )
                task.audio_file_path_absolute = gf.norm_join(
                    self.working_directory,
                    task.audio_file_path
                )
            self._log("Setting absolute paths for tasks... done")

            # return
            self._log("Loading job from container: succeeded")
            return True
        except:
            # failure: clean and return
            self.clean()
            self._log("Loading job from container: failed")
            return False

Example #8

0

Show file

File: executejob.py Project: dburt/aeneas

    def write_output_container(self, output_directory_path):
        """
        Write the output container for this job.

        Return a pair ``(bool, string)``, where the bool
        indicates whether the execution succeeded,
        and the string is the path to output container.

        :param output_directory_path: the path to a directory where
                                      the output container must be created
        :type  output_directory_path: string (path)
        :rtype: (bool, string)
        """
        self._log("Writing output container for this job")

        # check if the job has tasks
        if self.job == None:
            self._log("job is None")
            return (False, None)
        if len(self.job) == 0:
            self._log("The job has no tasks")
            return (False, None)

        try:
            # create temporary directory where the sync map files
            # will be created
            # this temporary directory will be compressed into
            # the output container
            self.tmp_directory = tempfile.mkdtemp(dir=gf.custom_tmp_dir())
            self._log("Created temporary directory '%s'" % self.tmp_directory)

            for task in self.job.tasks:
                custom_id = task.configuration.custom_id

                # check if the task has sync map and sync map file path
                if task.sync_map_file_path == None:
                    self._log("Task '%s' has sync_map_file_path not set" % custom_id)
                    return (False, None)
                if task.sync_map == None:
                    self._log("Task '%s' has sync_map not set" % custom_id)
                    return (False, None)

                # output sync map
                self._log("Outputting sync map for task '%s'..." % custom_id)
                task.output_sync_map_file(self.tmp_directory)
                self._log("Outputting sync map for task '%s'... done" % custom_id)

            # get output container info
            output_container_format = self.job.configuration.os_container_format
            self._log("Output container format: '%s'" % output_container_format)
            output_file_name = self.job.configuration.os_file_name
            if ((output_container_format != ContainerFormat.UNPACKED) and
                    (not output_file_name.endswith(output_container_format))):
                self._log("Adding extension to output_file_name")
                output_file_name += "." + output_container_format
            self._log("Output file name: '%s'" % output_file_name)
            output_file_path = gf.norm_join(
                output_directory_path,
                output_file_name
            )
            self._log("Output file path: '%s'" % output_file_path)

            # create output container
            self._log("Compressing...")
            container = Container(
                output_file_path,
                output_container_format,
                logger=self.logger
            )
            container.compress(self.tmp_directory)
            self._log("Compressing... done")
            self._log("Created output file: '%s'" % output_file_path)

            # clean and return
            self.clean(False)
            return (True, output_file_path)
        except:
            self.clean(False)
            return (False, None)

Example #9

0

Show file

File: executejob.py Project: ptrwtts/aeneas

    def write_output_container(self, output_directory_path):
        """
        Write the output container for this job.

        Return the path to output container,
        which is the concatenation of ``output_directory_path``
        and of the output container file or directory name.

        :param string output_directory_path: the path to a directory where
                                             the output container must be created
        :rtype: string
        :raises: :class:`~aeneas.executejob.ExecuteJobOutputError`: if there is a problem while writing the output container
        """
        self.log(u"Writing output container for this job")

        if self.job is None:
            self.log_exc(u"The job object is None", None, True, ExecuteJobOutputError)
        if len(self.job) == 0:
            self.log_exc(u"The job has no tasks", None, True, ExecuteJobOutputError)
        self.log([u"Number of tasks: '%d'", len(self.job)])

        # create temporary directory where the sync map files
        # will be created
        # this temporary directory will be compressed into
        # the output container
        self.tmp_directory = gf.tmp_directory(root=self.rconf[RuntimeConfiguration.TMP_PATH])
        self.log([u"Created temporary directory '%s'", self.tmp_directory])

        for task in self.job.tasks:
            custom_id = task.configuration["custom_id"]

            # check if the task has sync map and sync map file path
            if task.sync_map_file_path is None:
                self.log_exc(u"Task '%s' has sync_map_file_path not set" % (custom_id), None, True, ExecuteJobOutputError)
            if task.sync_map is None:
                self.log_exc(u"Task '%s' has sync_map not set" % (custom_id), None, True, ExecuteJobOutputError)

            try:
                # output sync map
                self.log([u"Outputting sync map for task '%s'...", custom_id])
                task.output_sync_map_file(self.tmp_directory)
                self.log([u"Outputting sync map for task '%s'... done", custom_id])
            except Exception as exc:
                self.log_exc(u"Error while outputting sync map for task '%s'" % (custom_id), None, True, ExecuteJobOutputError)

        # get output container info
        output_container_format = self.job.configuration["o_container_format"]
        self.log([u"Output container format: '%s'", output_container_format])
        output_file_name = self.job.configuration["o_name"]
        if ((output_container_format != ContainerFormat.UNPACKED) and
                (not output_file_name.endswith(output_container_format))):
            self.log(u"Adding extension to output_file_name")
            output_file_name += "." + output_container_format
        self.log([u"Output file name: '%s'", output_file_name])
        output_file_path = gf.norm_join(
            output_directory_path,
            output_file_name
        )
        self.log([u"Output file path: '%s'", output_file_path])

        try:
            self.log(u"Compressing...")
            container = Container(
                output_file_path,
                output_container_format,
                logger=self.logger
            )
            container.compress(self.tmp_directory)
            self.log(u"Compressing... done")
            self.log([u"Created output file: '%s'", output_file_path])
            self.log(u"Writing output container for this job: succeeded")
            self.clean(False)
            return output_file_path
        except Exception as exc:
            self.clean(False)
            self.log_exc(u"Error while compressing", exc, True, ExecuteJobOutputError)
            return None

Example #10

0

Show file

File: executejob.py Project: ptrwtts/aeneas

    def load_job_from_container(self, container_path, config_string=None):
        """
        Load the job from the given :class:`aeneas.container.Container` object.

        If ``config_string`` is ``None``,
        the container must contain a configuration file;
        otherwise use the provided config string
        (i.e., the wizard case).

        :param string container_path: the path to the input container
        :param string config_string: the configuration string (from wizard)
        :raises: :class:`~aeneas.executejob.ExecuteJobInputError`: if the given container does not contain a valid :class:`~aeneas.job.Job`
        """
        self.log(u"Loading job from container...")

        # create working directory where the input container
        # will be decompressed
        self.working_directory = gf.tmp_directory(root=self.rconf[RuntimeConfiguration.TMP_PATH])
        self.log([u"Created working directory '%s'", self.working_directory])

        try:
            self.log(u"Decompressing input container...")
            input_container = Container(container_path, logger=self.logger)
            input_container.decompress(self.working_directory)
            self.log(u"Decompressing input container... done")
        except Exception as exc:
            self.clean()
            self.log_exc(u"Unable to decompress container '%s': %s" % (container_path, exc), None, True, ExecuteJobInputError)

        try:
            self.log(u"Creating job from working directory...")
            working_container = Container(
                self.working_directory,
                logger=self.logger
            )
            analyzer = AnalyzeContainer(working_container, logger=self.logger)
            self.job = analyzer.analyze(config_string=config_string)
            self.log(u"Creating job from working directory... done")
        except Exception as exc:
            self.clean()
            self.log_exc(u"Unable to analyze container '%s': %s" % (container_path, exc), None, True, ExecuteJobInputError)

        if self.job is None:
            self.log_exc(u"The container '%s' does not contain a valid Job" % (container_path), None, True, ExecuteJobInputError)

        try:
            # set absolute path for text file and audio file
            # for each task in the job
            self.log(u"Setting absolute paths for tasks...")
            for task in self.job.tasks:
                task.text_file_path_absolute = gf.norm_join(
                    self.working_directory,
                    task.text_file_path
                )
                task.audio_file_path_absolute = gf.norm_join(
                    self.working_directory,
                    task.audio_file_path
                )
            self.log(u"Setting absolute paths for tasks... done")

            self.log(u"Loading job from container: succeeded")
        except Exception as exc:
            self.clean()
            self.log_exc(u"Error while setting absolute paths for tasks", exc, True, ExecuteJobInputError)

Example #11

0

Show file

File: analyzecontainer.py Project: fduch2k/aeneas

    def _analyze_xml_config(self, config_contents=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_contents`` is ``None``,
        try reading it from the XML config file inside the container.

        :param config_contents: the contents of the XML config file
        :type  config_contents: string
        :rtype: :class:`aeneas.job.Job`
        """
        # TODO break this function down into smaller functions
        self._log("Analyzing container with XML config string")

        if config_contents is None:
            self._log("Analyzing container with XML config file")
            config_entry = self.container.entry_config_xml
            self._log(["Found XML config entry '%s'", config_entry])
            config_dir = os.path.dirname(config_entry)
            self._log(["Directory of XML config entry: '%s'", config_dir])
            self._log(["Reading XML config entry: '%s'", config_entry])
            config_contents = self.container.read_entry(config_entry)
        else:
            self._log("Analyzing container with XML config contents")
            config_dir = ""

        # remove BOM
        #self._log("Removing BOM")
        #config_contents = gf.remove_bom(config_contents)

        # get the job parameters and tasks parameters
        self._log("Converting config contents into job config dict")
        job_parameters = gf.config_xml_to_dict(config_contents,
                                               result=None,
                                               parse_job=True)
        self._log("Converting config contents into tasks config dict")
        tasks_parameters = gf.config_xml_to_dict(config_contents,
                                                 result=None,
                                                 parse_job=False)

        # compute the root directory for the sync map files
        self._log("Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir, job_parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX])
        job_os_hierarchy_type = job_parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self._log([
            "Path of the sync map root directory: '%s'",
            sync_map_root_directory
        ])

        # create the Job object to be returned
        self._log("Converting job config dict into job config string")
        config_string = gf.config_dict_to_string(job_parameters)
        job = Job(config_string)

        # create the Task objects
        for task_parameters in tasks_parameters:
            self._log("Converting task config dict into task config string")
            config_string = gf.config_dict_to_string(task_parameters)
            self._log(["Creating task with config string '%s'", config_string])
            try:
                custom_id = task_parameters[gc.PPN_TASK_CUSTOM_ID]
            except KeyError:
                custom_id = ""
            task_info = [
                custom_id,
                gf.norm_join(config_dir,
                             task_parameters[gc.PPN_TASK_IS_TEXT_FILE_XML]),
                gf.norm_join(config_dir,
                             task_parameters[gc.PPN_TASK_IS_AUDIO_FILE_XML])
            ]
            self._log(["Creating task: '%s'", str(task_info)])
            task = self._create_task(task_info, config_string,
                                     sync_map_root_directory,
                                     job_os_hierarchy_type)
            job.add_task(task)

        # return the Job
        return job

Example #12

0

Show file

File: analyzecontainer.py Project: dburt/aeneas

    def _analyze_txt_config(self, config_string=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_string`` is ``None``,
        try reading it from the TXT config file inside the container.

        :param config_string: the configuration string
        :type  config_string: string
        :rtype: :class:`aeneas.job.Job`
        """
        # TODO break this function down into smaller functions
        self._log("Analyzing container with TXT config string")

        if config_string == None:
            self._log("Analyzing container with TXT config file")
            config_entry = self.container.entry_config_txt
            self._log("Found TXT config entry '%s'" % config_entry)
            config_dir = os.path.dirname(config_entry)
            self._log("Directory of TXT config entry: '%s'" % config_dir)
            self._log("Reading TXT config entry: '%s'" % config_entry)
            config_contents = self.container.read_entry(config_entry)
            #self._log("Removing BOM")
            #config_contents = gf.remove_bom(config_contents)
            self._log("Converting config contents to config string")
            config_string = gf.config_txt_to_string(config_contents)
        else:
            self._log("Analyzing container with TXT config string '%s'" % config_string)
            config_dir = ""
            #self._log("Removing BOM")
            #config_string = gf.remove_bom(config_string)

        # create the Job object to be returned
        self._log("Creating the Job object")
        job = Job(config_string)

        # get the entries in this container
        self._log("Getting entries")
        entries = self.container.entries()

        # convert the config string to dict
        self._log("Converting config string into config dict")
        parameters = gf.config_string_to_dict(config_string)

        # compute the root directory for the task assets
        self._log("Calculating the path of the tasks root directory")
        tasks_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX]
        )
        self._log("Path of the tasks root directory: '%s'" % tasks_root_directory)

        # compute the root directory for the sync map files
        self._log("Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]
        )
        job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self._log("Path of the sync map root directory: '%s'" % sync_map_root_directory)

        # prepare relative path and file name regex for text and audio files
        text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH]
        self._log("Relative path for text file: '%s'" % text_file_relative_path)
        text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        self._log("Regex for text file: '%s'" % parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH]
        self._log("Relative path for audio file: '%s'" % audio_file_relative_path)
        audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])
        self._log("Regex for audio file: '%s'" % parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])

        # flat hierarchy
        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT:
            self._log("Looking for text/audio pairs in flat hierarchy")
            text_files = self._find_files(
                entries,
                tasks_root_directory,
                text_file_relative_path,
                text_file_name_regex
            )
            self._log("Found text files: '%s'" % str(text_files))
            audio_files = self._find_files(
                entries,
                tasks_root_directory,
                audio_file_relative_path,
                audio_file_name_regex
            )
            self._log("Found audio files: '%s'" % str(audio_files))

            self._log("Matching files in flat hierarchy...")
            matched_tasks = self._match_files_flat_hierarchy(
                text_files,
                audio_files
            )
            self._log("Matching files in flat hierarchy... done")

            for task_info in matched_tasks:
                self._log("Creating task: '%s'" % str(task_info))
                task = self._create_task(
                    task_info,
                    config_string,
                    sync_map_root_directory,
                    job_os_hierarchy_type
                )
                job.add_task(task)

        # paged hierarchy
        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED:
            self._log("Looking for text/audio pairs in paged hierarchy")
            # find all subdirectories of tasks_root_directory
            # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX
            matched_directories = self._match_directories(
                entries,
                tasks_root_directory,
                parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX]
            )
            for matched_directory in matched_directories:
                # rebuild the full path
                matched_directory_full_path = gf.norm_join(
                    tasks_root_directory,
                    matched_directory
                )
                self._log("Looking for text/audio pairs in directory '%s'" % matched_directory_full_path)

                # look for text and audio files there
                text_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    text_file_relative_path,
                    text_file_name_regex
                )
                self._log("Found text files: '%s'" % str(text_files))
                audio_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    audio_file_relative_path,
                    audio_file_name_regex
                )
                self._log("Found audio files: '%s'" % str(audio_files))

                # if we have found exactly one text and one audio file,
                # create a Task
                if (len(text_files) == 1) and (len(audio_files) == 1):
                    self._log("Exactly one text file and one audio file in '%s'" % matched_directory)
                    task_info = [
                        matched_directory,
                        text_files[0],
                        audio_files[0]
                    ]
                    self._log("Creating task: '%s'" % str(task_info))
                    task = self._create_task(
                        task_info,
                        config_string,
                        sync_map_root_directory,
                        job_os_hierarchy_type
                    )
                    job.add_task(task)
                elif len(text_files) > 1:
                    self._log("More than one text file in '%s'" % matched_directory)
                elif len(audio_files) > 1:
                    self._log("More than one audio file in '%s'" % matched_directory)
                else:
                    self._log("No text nor audio file in '%s'" % matched_directory)

        # return the Job
        return job