Example #1
0
    def check_job_configuration(self, config_string):
        """
        Check whether the given job configuration string is well-formed
        and it has all the required parameters.

        :param config_string: the string to be checked
        :type  config_string: string
        :rtype: :class:`aeneas.validator.ValidatorResult`
        """
        self._log(["Checking job configuration '%s'", config_string])

        # remove BOM, if any
        #self._log("Removing BOM")
        #config_string = gf.remove_bom(config_string)

        # check if it is well encoded
        self._log("Checking that string is well encoded")
        result = self.check_string_well_encoded(config_string)
        if not result.passed:
            self._log("Failed")
            return result

        # check required parameters
        self._log("Checking required parameters")
        required_parameters = [
            gc.PPN_JOB_LANGUAGE,
            gc.PPN_JOB_OS_FILE_NAME,
            gc.PPN_JOB_OS_CONTAINER_FORMAT
        ]
        parameters = gf.config_string_to_dict(config_string, result)
        self._check_required_parameters(required_parameters, parameters, result)

        # return result
        self._log(["Checking job configuration: returning %s", result.passed])
        return result
Example #2
0
    def check_config_txt(self, contents, is_config_string=False):
        """
        Check whether the given TXT config file contents
        (if ``is_config_string`` is ``False``) or
        TXT config string (if ``is_config_string`` is ``True``)
        is well-formed and it has all the required parameters.

        :param string contents: the TXT config file contents or TXT config string
        :param bool is_config_string: if ``True``, contents is a config string
        :rtype: :class:`~aeneas.validator.ValidatorResult`
        """
        self.log(u"Checking contents TXT config file")
        self.result = ValidatorResult()
        if self._are_safety_checks_disabled(u"check_config_txt"):
            return self.result
        is_bstring = gf.is_bytes(contents)
        if is_bstring:
            self.log(u"Checking that contents is well formed")
            self.check_raw_string(contents, is_bstring=True)
            if not self.result.passed:
                return self.result
            contents = gf.safe_unicode(contents)
        if not is_config_string:
            self.log(u"Converting file contents to config string")
            contents = gf.config_txt_to_string(contents)
        self.log(u"Checking required parameters")
        required_parameters = self.TXT_REQUIRED_PARAMETERS
        parameters = gf.config_string_to_dict(contents, self.result)
        self._check_required_parameters(required_parameters, parameters)
        self.log([u"Checking contents: returning %s", self.result.passed])
        return self.result
Example #3
0
    def __init__(self, config_string=None):
        if (config_string is not None) and (not gf.is_unicode(config_string)):
            raise TypeError(u"config_string is not a Unicode string")

        # set dictionaries up to keep the config data
        self.data = {}
        self.types = {}
        self.aliases = {}
        for (field, info) in self.FIELDS:
            (fdefault, ftype, faliases) = info
            self.data[field] = fdefault
            self.types[field] = ftype
            for alias in faliases:
                self.aliases[alias] = field

        if config_string is not None:
            # strip leading/trailing " or ' characters
            if (len(config_string) >
                    0) and (config_string[0]
                            == config_string[-1]) and (config_string[0]
                                                       in [u"\"", u"'"]):
                config_string = config_string[1:-1]
            # populate values from config_string,
            # ignoring keys not present in FIELDS
            properties = gf.config_string_to_dict(config_string)
            for key in set(properties.keys()) & set(self.data.keys()):
                self.data[key] = properties[key]
Example #4
0
    def __init__(self, config_string=None):
        # task fields
        self.field_names = [
            gc.PPN_TASK_DESCRIPTION, gc.PPN_TASK_LANGUAGE,
            gc.PPN_TASK_CUSTOM_ID, gc.PPN_TASK_ADJUST_BOUNDARY_ALGORITHM,
            gc.PPN_TASK_ADJUST_BOUNDARY_AFTERCURRENT_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_BEFORENEXT_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_OFFSET_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_PERCENT_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_RATE_VALUE,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_HEAD_MIN,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_HEAD_MAX,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_TAIL_MIN,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_TAIL_MAX,
            gc.PPN_TASK_IS_AUDIO_FILE_HEAD_LENGTH,
            gc.PPN_TASK_IS_AUDIO_FILE_PROCESS_LENGTH,
            gc.PPN_TASK_IS_TEXT_FILE_FORMAT,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT, gc.PPN_TASK_OS_FILE_FORMAT,
            gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF,
            gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF,
            gc.PPN_TASK_OS_FILE_HEAD_TAIL_FORMAT
        ]
        self.fields = dict()
        for key in self.field_names:
            self.fields[key] = None

        # populate values from config_string
        if config_string is not None:
            properties = gf.config_string_to_dict(config_string)
            for key in properties:
                if key in self.field_names:
                    self.fields[key] = properties[key]
Example #5
0
    def __init__(self, config_string=None):
        # job fields
        self.field_names = [
            gc.PPN_JOB_DESCRIPTION,
            gc.PPN_JOB_LANGUAGE,

            gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX,
            gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_HIERARCHY_PREFIX,
            gc.PPN_JOB_IS_HIERARCHY_TYPE,
            gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX,
            gc.PPN_JOB_IS_TEXT_FILE_FORMAT,
            gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX,
            gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT,

            gc.PPN_JOB_OS_FILE_NAME,
            gc.PPN_JOB_OS_CONTAINER_FORMAT,
            gc.PPN_JOB_OS_HIERARCHY_TYPE,
            gc.PPN_JOB_OS_HIERARCHY_PREFIX,
        ]
        self.fields = dict()
        for key in self.field_names:
            self.fields[key] = None

        # populate values from config_string
        if config_string is not None:
            properties = gf.config_string_to_dict(config_string)
            for key in properties:
                if key in self.field_names:
                    self.fields[key] = properties[key]
Example #6
0
    def check_task_configuration(self, config_string):
        """
        Check whether the given task configuration string is well-formed
        and it has all the required parameters.

        :param config_string: the string to be checked
        :type  config_string: string
        :rtype: :class:`aeneas.validator.ValidatorResult`
        """
        self._log(["Checking task configuration '%s'", config_string])

        # remove BOM, if any
        #self._log("Removing BOM")
        #config_string = gf.remove_bom(config_string)

        # check if it is well encoded
        self._log("Checking that string is well encoded")
        result = self.check_string_well_encoded(config_string)
        if not result.passed:
            self._log("Failed")
            return result

        # check required parameters
        self._log("Checking required parameters")
        required_parameters = [
            gc.PPN_TASK_IS_TEXT_FILE_FORMAT, gc.PPN_TASK_LANGUAGE,
            gc.PPN_TASK_OS_FILE_NAME, gc.PPN_TASK_OS_FILE_FORMAT
        ]
        parameters = gf.config_string_to_dict(config_string, result)
        self._check_required_parameters(required_parameters, parameters,
                                        result)

        # return result
        self._log(["Checking task configuration: returning %s", result.passed])
        return result
Example #7
0
    def __init__(self, config_string=None):
        if (config_string is not None) and (not gf.is_unicode(config_string)):
            raise TypeError(u"config_string is not a Unicode string")

        # set dictionaries up to keep the config data
        self.data = {}
        self.types = {}
        self.aliases = {}
        self.desc = {}
        for (field, info) in self.FIELDS:
            (fdefault, ftype, faliases, fdesc) = info
            self.data[field] = fdefault
            self.types[field] = ftype
            self.desc[field] = fdesc
            for alias in faliases:
                self.aliases[alias] = field

        if config_string is not None:
            # strip leading/trailing " or ' characters
            if (
                (len(config_string) > 0) and
                (config_string[0] == config_string[-1]) and
                (config_string[0] in [u"\"", u"'"])
            ):
                config_string = config_string[1:-1]
            # populate values from config_string,
            # ignoring keys not present in FIELDS
            properties = gf.config_string_to_dict(config_string)
            for key in set(properties.keys()) & set(self.data.keys()):
                self.data[key] = properties[key]
Example #8
0
    def __init__(self, config_string=None):
        # job fields
        self.field_names = [
            gc.PPN_JOB_DESCRIPTION,
            gc.PPN_JOB_LANGUAGE,
            gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX,
            gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_HIERARCHY_PREFIX,
            gc.PPN_JOB_IS_HIERARCHY_TYPE,
            gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX,
            gc.PPN_JOB_IS_TEXT_FILE_FORMAT,
            gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX,
            gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_TEXT_UNPARSED_CLASS_REGEX,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_REGEX,
            gc.PPN_JOB_IS_TEXT_UNPARSED_ID_SORT,
            gc.PPN_JOB_OS_FILE_NAME,
            gc.PPN_JOB_OS_CONTAINER_FORMAT,
            gc.PPN_JOB_OS_HIERARCHY_TYPE,
            gc.PPN_JOB_OS_HIERARCHY_PREFIX,
        ]
        self.fields = dict()
        for key in self.field_names:
            self.fields[key] = None

        # populate values from config_string
        if config_string is not None:
            properties = gf.config_string_to_dict(config_string)
            for key in properties:
                if key in self.field_names:
                    self.fields[key] = properties[key]
Example #9
0
File: task.py Project: cbeer/aeneas
    def __init__(self, config_string=None):
        # task fields
        self.field_names = [
            gc.PPN_TASK_DESCRIPTION,
            gc.PPN_TASK_LANGUAGE,
            gc.PPN_TASK_CUSTOM_ID,

            gc.PPN_TASK_IS_AUDIO_FILE_HEAD_LENGTH,
            gc.PPN_TASK_IS_AUDIO_FILE_PROCESS_LENGTH,
            gc.PPN_TASK_IS_TEXT_FILE_FORMAT,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT,

            gc.PPN_TASK_OS_FILE_FORMAT,
            gc.PPN_TASK_OS_FILE_NAME,
            gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF,
            gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF
        ]
        self.fields = dict()
        for key in self.field_names:
            self.fields[key] = None

        # populate values from config_string
        if config_string != None:
            properties = gf.config_string_to_dict(config_string)
            for key in properties:
                if key in self.field_names:
                    self.fields[key] = properties[key]
Example #10
0
    def _create_task(
            self,
            task_info,
            config_string,
            sync_map_root_directory,
            job_os_hierarchy_type
        ):
        """
        Create a task object from

        1. the ``task_info`` found analyzing the container entries, and
        2. the given ``config_string``.

        :param list task_info: the task information: ``[prefix, text_path, audio_path]``
        :param string config_string: the configuration string
        :param string sync_map_root_directory: the root directory for the sync map files
        :param job_os_hierarchy_type: type of job output hierarchy
        :type  job_os_hierarchy_type: :class:`~aeneas.hierarchytype.HierarchyType`
        :rtype: :class:`~aeneas.task.Task`
        """
        self.log(u"Converting config string to config dict")
        parameters = gf.config_string_to_dict(config_string)
        self.log(u"Creating task")
        task = Task(config_string, logger=self.logger)
        task.configuration["description"] = "Task %s" % task_info[0]
        self.log([u"Task description: %s", task.configuration["description"]])
        try:
            task.configuration["language"] = parameters[gc.PPN_TASK_LANGUAGE]
            self.log([u"Set language from task: '%s'", task.configuration["language"]])
        except KeyError:
            task.configuration["language"] = parameters[gc.PPN_JOB_LANGUAGE]
            self.log([u"Set language from job: '%s'", task.configuration["language"]])
        custom_id = task_info[0]
        task.configuration["custom_id"] = custom_id
        self.log([u"Task custom_id: %s", task.configuration["custom_id"]])
        task.text_file_path = task_info[1]
        self.log([u"Task text file path: %s", task.text_file_path])
        task.audio_file_path = task_info[2]
        self.log([u"Task audio file path: %s", task.audio_file_path])
        task.sync_map_file_path = self._compute_sync_map_file_path(
            sync_map_root_directory,
            job_os_hierarchy_type,
            custom_id,
            task.configuration["o_name"]
        )
        self.log([u"Task sync map file path: %s", task.sync_map_file_path])

        self.log(u"Replacing placeholder in os_file_smil_audio_ref")
        task.configuration["o_smil_audio_ref"] = self._replace_placeholder(
            task.configuration["o_smil_audio_ref"],
            custom_id
        )
        self.log(u"Replacing placeholder in os_file_smil_page_ref")
        task.configuration["o_smil_page_ref"] = self._replace_placeholder(
            task.configuration["o_smil_page_ref"],
            custom_id
        )
        self.log(u"Returning task")
        return task
Example #11
0
    def check_contents_txt_config_file(self,
                                       config_contents,
                                       convert_to_string=True):
        """
        Check whether the given TXT config contents (or config string)
        is well formed and contains all the requested parameters.

        :param config_contents:
        :type  config_contents: string
        :param convert_to_string: the ``config_contents`` must be converted
                                  to a config string
        :type convert_to_string: bool
        :rtype: :class:`aeneas.validator.ValidatorResult`
        """
        self._log("Checking contents TXT config file")

        result = ValidatorResult()
        if convert_to_string:
            #self._log("Removing BOM")
            #config_contents = gf.remove_bom(config_contents)
            self._log("Converting file contents to config string")
            config_string = gf.config_txt_to_string(config_contents)
        #else:
        #self._log("Removing BOM")
        #config_string = gf.remove_bom(config_string)

        # check if it is well encoded
        self._log("Checking that string is well encoded")
        if not self.check_string_well_encoded(config_string):
            msg = "The TXT config is not well encoded"
            result.passed = False
            result.add_error(msg)
            self._log(msg)
            return result

        # check required parameters
        self._log("Checking required parameters")
        required_parameters = [
            gc.PPN_JOB_IS_HIERARCHY_TYPE, gc.PPN_JOB_IS_HIERARCHY_PREFIX,
            gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX, gc.PPN_JOB_IS_TEXT_FILE_FORMAT,
            gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX, gc.PPN_JOB_OS_FILE_NAME,
            gc.PPN_JOB_OS_CONTAINER_FORMAT, gc.PPN_JOB_OS_HIERARCHY_TYPE,
            gc.PPN_JOB_OS_HIERARCHY_PREFIX, gc.PPN_TASK_OS_FILE_NAME,
            gc.PPN_TASK_OS_FILE_FORMAT, gc.PPN_JOB_LANGUAGE
        ]
        parameters = gf.config_string_to_dict(config_string, result)
        self._check_required_parameters(required_parameters, parameters,
                                        result)

        # return result
        self._log(
            ["Checking contents TXT config file: returning %s", result.passed])
        return result
Example #12
0
    def _create_task(self, task_info, config_string, sync_map_root_directory,
                     job_os_hierarchy_type):
        """
        Create a task object from

        1. the ``task_info`` found analyzing the container entries, and
        2. the given ``config_string``.

        :param task_info: the task information: ``[prefix, text_path, audio_path]``
        :type  task_info: list of strings
        :param config_string: the configuration string
        :type  config_string: string
        :param sync_map_root_directory: the root directory for the sync map files
        :type  sync_map_root_directory: string (path)
        :param job_os_hierarchy_type: type of job output hierarchy
        :type  job_os_hierarchy_type: :class:`aeneas.hierarchytype.HierarchyType`
        :rtype: :class:`aeneas.task.Task`
        """
        self._log("Converting config string to config dict")
        parameters = gf.config_string_to_dict(config_string)
        self._log("Creating task")
        task = Task(config_string)
        task.configuration.description = "Task %s" % task_info[0]
        self._log(["Task description: %s", task.configuration.description])
        try:
            task.configuration.language = parameters[gc.PPN_TASK_LANGUAGE]
            self._log(
                ["Set language from task: '%s'", task.configuration.language])
        except KeyError:
            task.configuration.language = parameters[gc.PPN_JOB_LANGUAGE]
            self._log(
                ["Set language from job: '%s'", task.configuration.language])
        custom_id = task_info[0]
        task.configuration.custom_id = custom_id
        self._log(["Task custom_id: %s", task.configuration.custom_id])
        task.text_file_path = task_info[1]
        self._log(["Task text file path: %s", task.text_file_path])
        task.audio_file_path = task_info[2]
        self._log(["Task audio file path: %s", task.audio_file_path])
        task.sync_map_file_path = self._compute_sync_map_file_path(
            sync_map_root_directory, job_os_hierarchy_type, custom_id,
            task.configuration.os_file_name)
        self._log(["Task sync map file path: %s", task.sync_map_file_path])

        self._log("Replacing placeholder in os_file_smil_audio_ref")
        task.configuration.os_file_smil_audio_ref = self._replace_placeholder(
            task.configuration.os_file_smil_audio_ref, custom_id)
        self._log("Replacing placeholder in os_file_smil_page_ref")
        task.configuration.os_file_smil_page_ref = self._replace_placeholder(
            task.configuration.os_file_smil_page_ref, custom_id)
        self._log("Returning task")
        return task
 def test_config_string_to_dict(self):
     tests = [
         (None, {}),
         (u"", {}),
         (u"k1=v1", {
             u"k1": u"v1"
         }),
         (u"k1=v1|", {
             u"k1": u"v1"
         }),
         (u"|k1=v1|", {
             u"k1": u"v1"
         }),
         (u"|k1=v1", {
             u"k1": u"v1"
         }),
         (u"k1=v1|k1=v2", {
             u"k1": u"v2"
         }),
         (u"k1=v1|k2=v2", {
             u"k1": u"v1",
             u"k2": u"v2"
         }),
         (u"k1=v1|k2=v2|k1=v3", {
             u"k1": u"v3",
             u"k2": u"v2"
         }),
         (u"k1=v1||k2=v2", {
             u"k1": u"v1",
             u"k2": u"v2"
         }),
         (u"k1=v1|k2=v2|k3=v3", {
             u"k1": u"v1",
             u"k2": u"v2",
             u"k3": u"v3"
         }),
         (u"k1=v1|k2=|k3=v3", {
             u"k1": u"v1",
             u"k3": u"v3"
         }),
         (u"k1=v1|=v2|k3=v3", {
             u"k1": u"v1",
             u"k3": u"v3"
         }),
     ]
     for test in tests:
         self.assertEqual(gf.config_string_to_dict(test[0]), test[1])
Example #14
0
 def test_config_string_to_dict(self):
     tests = [
         (None, {}),
         (u"", {}),
         (u"k1=v1", {u"k1": u"v1"}),
         (u"k1=v1|", {u"k1": u"v1"}),
         (u"|k1=v1|", {u"k1": u"v1"}),
         (u"|k1=v1", {u"k1": u"v1"}),
         (u"k1=v1|k1=v2", {u"k1": u"v2"}),
         (u"k1=v1|k2=v2", {u"k1": u"v1", u"k2": u"v2"}),
         (u"k1=v1|k2=v2|k1=v3", {u"k1": u"v3", u"k2": u"v2"}),
         (u"k1=v1||k2=v2", {u"k1": u"v1", u"k2": u"v2"}),
         (u"k1=v1|k2=v2|k3=v3", {u"k1": u"v1", u"k2": u"v2", u"k3": u"v3"}),
         (u"k1=v1|k2=|k3=v3", {u"k1": u"v1", u"k3": u"v3"}),
         (u"k1=v1|=v2|k3=v3", {u"k1": u"v1", u"k3": u"v3"}),
     ]
     for test in tests:
         self.assertEqual(gf.config_string_to_dict(test[0]), test[1])
Example #15
0
    def check_configuration_string(
            self,
            config_string,
            is_job=True,
            external_name=False
    ):
        """
        Check whether the given job or task configuration string
        is well-formed (if ``is_bstring`` is ``True``)
        and it has all the required parameters.

        :param string config_string: the byte string or Unicode string to be checked
        :param bool is_job: if ``True``, ``config_string`` is a job config string
        :param bool external_name: if ``True``, the task name is provided externally,
                                   and it is not required to appear
                                   in the config string
        :rtype: :class:`~aeneas.validator.ValidatorResult`
        """
        if is_job:
            self.log(u"Checking job configuration string")
        else:
            self.log(u"Checking task configuration string")
        self.result = ValidatorResult()
        if self._are_safety_checks_disabled(u"check_configuration_string"):
            return self.result
        if is_job:
            required_parameters = self.JOB_REQUIRED_PARAMETERS
        elif external_name:
            required_parameters = self.TASK_REQUIRED_PARAMETERS_EXTERNAL_NAME
        else:
            required_parameters = self.TASK_REQUIRED_PARAMETERS
        is_bstring = gf.is_bytes(config_string)
        if is_bstring:
            self.log(u"Checking that config_string is well formed")
            self.check_raw_string(config_string, is_bstring=True)
            if not self.result.passed:
                return self.result
            config_string = gf.safe_unicode(config_string)
        self.log(u"Checking required parameters")
        parameters = gf.config_string_to_dict(config_string, self.result)
        self._check_required_parameters(required_parameters, parameters)
        self.log([u"Checking config_string: returning %s", self.result.passed])
        return self.result
Example #16
0
    def __init__(self, config_string=None):
        # task fields
        self.field_names = [
            gc.PPN_TASK_DESCRIPTION,
            gc.PPN_TASK_LANGUAGE,
            gc.PPN_TASK_CUSTOM_ID,

            gc.PPN_TASK_ADJUST_BOUNDARY_ALGORITHM,
            gc.PPN_TASK_ADJUST_BOUNDARY_AFTERCURRENT_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_BEFORENEXT_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_OFFSET_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_PERCENT_VALUE,
            gc.PPN_TASK_ADJUST_BOUNDARY_RATE_VALUE,

            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_HEAD_MIN,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_HEAD_MAX,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_TAIL_MIN,
            gc.PPN_TASK_IS_AUDIO_FILE_DETECT_TAIL_MAX,
            gc.PPN_TASK_IS_AUDIO_FILE_HEAD_LENGTH,
            gc.PPN_TASK_IS_AUDIO_FILE_PROCESS_LENGTH,
            gc.PPN_TASK_IS_TEXT_FILE_FORMAT,
            gc.PPN_TASK_IS_TEXT_UNPARSED_CLASS_REGEX,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_REGEX,
            gc.PPN_TASK_IS_TEXT_UNPARSED_ID_SORT,

            gc.PPN_TASK_OS_FILE_FORMAT,
            gc.PPN_TASK_OS_FILE_NAME,
            gc.PPN_TASK_OS_FILE_SMIL_AUDIO_REF,
            gc.PPN_TASK_OS_FILE_SMIL_PAGE_REF,
            gc.PPN_TASK_OS_FILE_HEAD_TAIL_FORMAT
        ]
        self.fields = dict()
        for key in self.field_names:
            self.fields[key] = None

        # populate values from config_string
        if config_string is not None:
            properties = gf.config_string_to_dict(config_string)
            for key in properties:
                if key in self.field_names:
                    self.fields[key] = properties[key]
Example #17
0
    def _analyze_txt_config(self, config_string=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_string`` is ``None``,
        try reading it from the TXT config file inside the container.

        :param string config_string: the configuration string
        :rtype: :class:`~aeneas.job.Job`
        """
        self.log(u"Analyzing container with TXT config string")

        if config_string is None:
            self.log(u"Analyzing container with TXT config file")
            config_entry = self.container.entry_config_txt
            self.log([u"Found TXT config entry '%s'", config_entry])
            config_dir = os.path.dirname(config_entry)
            self.log([u"Directory of TXT config entry: '%s'", config_dir])
            self.log([u"Reading TXT config entry: '%s'", config_entry])
            config_contents = self.container.read_entry(config_entry)
            self.log(u"Converting config contents to config string")
            config_contents = gf.safe_unicode(config_contents)
            config_string = gf.config_txt_to_string(config_contents)
        else:
            self.log([u"Analyzing container with TXT config string '%s'", config_string])
            config_dir = ""

        self.log(u"Creating the Job object")
        job = Job(config_string)

        self.log(u"Getting entries")
        entries = self.container.entries

        self.log(u"Converting config string into config dict")
        parameters = gf.config_string_to_dict(config_string)

        self.log(u"Calculating the path of the tasks root directory")
        tasks_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX]
        )
        self.log([u"Path of the tasks root directory: '%s'", tasks_root_directory])

        self.log(u"Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]
        )
        job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self.log([u"Path of the sync map root directory: '%s'", sync_map_root_directory])

        text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH]
        self.log([u"Relative path for text file: '%s'", text_file_relative_path])
        text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        self.log([u"Regex for text file: '%s'", parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]])
        audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH]
        self.log([u"Relative path for audio file: '%s'", audio_file_relative_path])
        audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])
        self.log([u"Regex for audio file: '%s'", parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]])

        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT:
            self.log(u"Looking for text/audio pairs in flat hierarchy")
            text_files = self._find_files(
                entries,
                tasks_root_directory,
                text_file_relative_path,
                text_file_name_regex
            )
            self.log([u"Found text files: '%s'", text_files])
            audio_files = self._find_files(
                entries,
                tasks_root_directory,
                audio_file_relative_path,
                audio_file_name_regex
            )
            self.log([u"Found audio files: '%s'", audio_files])

            self.log(u"Matching files in flat hierarchy...")
            matched_tasks = self._match_files_flat_hierarchy(
                text_files,
                audio_files
            )
            self.log(u"Matching files in flat hierarchy... done")

            for task_info in matched_tasks:
                self.log([u"Creating task: '%s'", str(task_info)])
                task = self._create_task(
                    task_info,
                    config_string,
                    sync_map_root_directory,
                    job_os_hierarchy_type
                )
                job.add_task(task)

        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED:
            self.log(u"Looking for text/audio pairs in paged hierarchy")
            # find all subdirectories of tasks_root_directory
            # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX
            matched_directories = self._match_directories(
                entries,
                tasks_root_directory,
                parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX]
            )
            for matched_directory in matched_directories:
                # rebuild the full path
                matched_directory_full_path = gf.norm_join(
                    tasks_root_directory,
                    matched_directory
                )
                self.log([u"Looking for text/audio pairs in directory '%s'", matched_directory_full_path])

                # look for text and audio files there
                text_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    text_file_relative_path,
                    text_file_name_regex
                )
                self.log([u"Found text files: '%s'", text_files])
                audio_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    audio_file_relative_path,
                    audio_file_name_regex
                )
                self.log([u"Found audio files: '%s'", audio_files])

                # if we have found exactly one text and one audio file,
                # create a Task
                if (len(text_files) == 1) and (len(audio_files) == 1):
                    self.log([u"Exactly one text file and one audio file in '%s'", matched_directory])
                    task_info = [
                        matched_directory,
                        text_files[0],
                        audio_files[0]
                    ]
                    self.log([u"Creating task: '%s'", str(task_info)])
                    task = self._create_task(
                        task_info,
                        config_string,
                        sync_map_root_directory,
                        job_os_hierarchy_type
                    )
                    job.add_task(task)
                elif len(text_files) > 1:
                    self.log([u"More than one text file in '%s'", matched_directory])
                elif len(audio_files) > 1:
                    self.log([u"More than one audio file in '%s'", matched_directory])
                else:
                    self.log([u"No text nor audio file in '%s'", matched_directory])

        return job
Example #18
0
    def _analyze_txt_config(self, config_string=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_string`` is ``None``,
        try reading it from the TXT config file inside the container.

        :param string config_string: the configuration string
        :rtype: :class:`~aeneas.job.Job`
        """
        self.log(u"Analyzing container with TXT config string")

        if config_string is None:
            self.log(u"Analyzing container with TXT config file")
            config_entry = self.container.entry_config_txt
            self.log([u"Found TXT config entry '%s'", config_entry])
            config_dir = os.path.dirname(config_entry)
            self.log([u"Directory of TXT config entry: '%s'", config_dir])
            self.log([u"Reading TXT config entry: '%s'", config_entry])
            config_contents = self.container.read_entry(config_entry)
            self.log(u"Converting config contents to config string")
            config_contents = gf.safe_unicode(config_contents)
            config_string = gf.config_txt_to_string(config_contents)
        else:
            self.log([
                u"Analyzing container with TXT config string '%s'",
                config_string
            ])
            config_dir = ""

        self.log(u"Creating the Job object")
        job = Job(config_string)

        self.log(u"Getting entries")
        entries = self.container.entries

        self.log(u"Converting config string into config dict")
        parameters = gf.config_string_to_dict(config_string)

        self.log(u"Calculating the path of the tasks root directory")
        tasks_root_directory = gf.norm_join(
            config_dir, parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX])
        self.log(
            [u"Path of the tasks root directory: '%s'", tasks_root_directory])

        self.log(u"Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir, parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX])
        job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self.log([
            u"Path of the sync map root directory: '%s'",
            sync_map_root_directory
        ])

        text_file_relative_path = parameters[
            gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH]
        self.log(
            [u"Relative path for text file: '%s'", text_file_relative_path])
        text_file_name_regex = re.compile(
            r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        self.log([
            u"Regex for text file: '%s'",
            parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX]
        ])
        audio_file_relative_path = parameters[
            gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH]
        self.log(
            [u"Relative path for audio file: '%s'", audio_file_relative_path])
        audio_file_name_regex = re.compile(
            r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])
        self.log([
            u"Regex for audio file: '%s'",
            parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX]
        ])

        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT:
            self.log(u"Looking for text/audio pairs in flat hierarchy")
            text_files = self._find_files(entries, tasks_root_directory,
                                          text_file_relative_path,
                                          text_file_name_regex)
            self.log([u"Found text files: '%s'", text_files])
            audio_files = self._find_files(entries, tasks_root_directory,
                                           audio_file_relative_path,
                                           audio_file_name_regex)
            self.log([u"Found audio files: '%s'", audio_files])

            self.log(u"Matching files in flat hierarchy...")
            matched_tasks = self._match_files_flat_hierarchy(
                text_files, audio_files)
            self.log(u"Matching files in flat hierarchy... done")

            for task_info in matched_tasks:
                self.log([u"Creating task: '%s'", str(task_info)])
                task = self._create_task(task_info, config_string,
                                         sync_map_root_directory,
                                         job_os_hierarchy_type)
                job.add_task(task)

        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED:
            self.log(u"Looking for text/audio pairs in paged hierarchy")
            # find all subdirectories of tasks_root_directory
            # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX
            matched_directories = self._match_directories(
                entries, tasks_root_directory,
                parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX])
            for matched_directory in matched_directories:
                # rebuild the full path
                matched_directory_full_path = gf.norm_join(
                    tasks_root_directory, matched_directory)
                self.log([
                    u"Looking for text/audio pairs in directory '%s'",
                    matched_directory_full_path
                ])

                # look for text and audio files there
                text_files = self._find_files(entries,
                                              matched_directory_full_path,
                                              text_file_relative_path,
                                              text_file_name_regex)
                self.log([u"Found text files: '%s'", text_files])
                audio_files = self._find_files(entries,
                                               matched_directory_full_path,
                                               audio_file_relative_path,
                                               audio_file_name_regex)
                self.log([u"Found audio files: '%s'", audio_files])

                # if we have found exactly one text and one audio file,
                # create a Task
                if (len(text_files) == 1) and (len(audio_files) == 1):
                    self.log([
                        u"Exactly one text file and one audio file in '%s'",
                        matched_directory
                    ])
                    task_info = [
                        matched_directory, text_files[0], audio_files[0]
                    ]
                    self.log([u"Creating task: '%s'", str(task_info)])
                    task = self._create_task(task_info, config_string,
                                             sync_map_root_directory,
                                             job_os_hierarchy_type)
                    job.add_task(task)
                elif len(text_files) > 1:
                    self.log([
                        u"More than one text file in '%s'", matched_directory
                    ])
                elif len(audio_files) > 1:
                    self.log([
                        u"More than one audio file in '%s'", matched_directory
                    ])
                else:
                    self.log(
                        [u"No text nor audio file in '%s'", matched_directory])

        return job
Example #19
0
    def check_contents_txt_config_file(
            self,
            config_contents,
            convert_to_string=True
        ):
        """
        Check whether the given TXT config contents (or config string)
        is well formed and contains all the requested parameters.

        :param config_contents:
        :type  config_contents: string
        :param convert_to_string: the ``config_contents`` must be converted
                                  to a config string
        :type convert_to_string: bool
        :rtype: :class:`aeneas.validator.ValidatorResult`
        """
        self._log("Checking contents TXT config file")

        result = ValidatorResult()
        if convert_to_string:
            #self._log("Removing BOM")
            #config_contents = gf.remove_bom(config_contents)
            self._log("Converting file contents to config string")
            config_string = gf.config_txt_to_string(config_contents)
        #else:
            #self._log("Removing BOM")
            #config_string = gf.remove_bom(config_string)

        # check if it is well encoded
        self._log("Checking that string is well encoded")
        if not self.check_string_well_encoded(config_string):
            msg = "The TXT config is not well encoded"
            result.passed = False
            result.add_error(msg)
            self._log(msg)
            return result

        # check required parameters
        self._log("Checking required parameters")
        required_parameters = [
            gc.PPN_JOB_IS_HIERARCHY_TYPE,
            gc.PPN_JOB_IS_HIERARCHY_PREFIX,
            gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX,
            gc.PPN_JOB_IS_TEXT_FILE_FORMAT,
            gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH,
            gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX,
            gc.PPN_JOB_OS_FILE_NAME,
            gc.PPN_JOB_OS_CONTAINER_FORMAT,
            gc.PPN_JOB_OS_HIERARCHY_TYPE,
            gc.PPN_JOB_OS_HIERARCHY_PREFIX,
            gc.PPN_TASK_OS_FILE_NAME,
            gc.PPN_TASK_OS_FILE_FORMAT,
            gc.PPN_JOB_LANGUAGE
        ]
        parameters = gf.config_string_to_dict(config_string, result)
        self._check_required_parameters(required_parameters, parameters, result)

        # return result
        self._log(["Checking contents TXT config file: returning %s", result.passed])
        return result
Example #20
0
    def _analyze_txt_config(self, config_string=None):
        """
        Analyze the given container and return the corresponding job.

        If ``config_string`` is ``None``,
        try reading it from the TXT config file inside the container.

        :param config_string: the configuration string
        :type  config_string: string
        :rtype: :class:`aeneas.job.Job`
        """
        # TODO break this function down into smaller functions
        self._log("Analyzing container with TXT config string")

        if config_string == None:
            self._log("Analyzing container with TXT config file")
            config_entry = self.container.entry_config_txt
            self._log("Found TXT config entry '%s'" % config_entry)
            config_dir = os.path.dirname(config_entry)
            self._log("Directory of TXT config entry: '%s'" % config_dir)
            self._log("Reading TXT config entry: '%s'" % config_entry)
            config_contents = self.container.read_entry(config_entry)
            #self._log("Removing BOM")
            #config_contents = gf.remove_bom(config_contents)
            self._log("Converting config contents to config string")
            config_string = gf.config_txt_to_string(config_contents)
        else:
            self._log("Analyzing container with TXT config string '%s'" % config_string)
            config_dir = ""
            #self._log("Removing BOM")
            #config_string = gf.remove_bom(config_string)

        # create the Job object to be returned
        self._log("Creating the Job object")
        job = Job(config_string)

        # get the entries in this container
        self._log("Getting entries")
        entries = self.container.entries()

        # convert the config string to dict
        self._log("Converting config string into config dict")
        parameters = gf.config_string_to_dict(config_string)

        # compute the root directory for the task assets
        self._log("Calculating the path of the tasks root directory")
        tasks_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_IS_HIERARCHY_PREFIX]
        )
        self._log("Path of the tasks root directory: '%s'" % tasks_root_directory)

        # compute the root directory for the sync map files
        self._log("Calculating the path of the sync map root directory")
        sync_map_root_directory = gf.norm_join(
            config_dir,
            parameters[gc.PPN_JOB_OS_HIERARCHY_PREFIX]
        )
        job_os_hierarchy_type = parameters[gc.PPN_JOB_OS_HIERARCHY_TYPE]
        self._log("Path of the sync map root directory: '%s'" % sync_map_root_directory)

        # prepare relative path and file name regex for text and audio files
        text_file_relative_path = parameters[gc.PPN_JOB_IS_TEXT_FILE_RELATIVE_PATH]
        self._log("Relative path for text file: '%s'" % text_file_relative_path)
        text_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        self._log("Regex for text file: '%s'" % parameters[gc.PPN_JOB_IS_TEXT_FILE_NAME_REGEX])
        audio_file_relative_path = parameters[gc.PPN_JOB_IS_AUDIO_FILE_RELATIVE_PATH]
        self._log("Relative path for audio file: '%s'" % audio_file_relative_path)
        audio_file_name_regex = re.compile(r"" + parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])
        self._log("Regex for audio file: '%s'" % parameters[gc.PPN_JOB_IS_AUDIO_FILE_NAME_REGEX])

        # flat hierarchy
        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.FLAT:
            self._log("Looking for text/audio pairs in flat hierarchy")
            text_files = self._find_files(
                entries,
                tasks_root_directory,
                text_file_relative_path,
                text_file_name_regex
            )
            self._log("Found text files: '%s'" % str(text_files))
            audio_files = self._find_files(
                entries,
                tasks_root_directory,
                audio_file_relative_path,
                audio_file_name_regex
            )
            self._log("Found audio files: '%s'" % str(audio_files))

            self._log("Matching files in flat hierarchy...")
            matched_tasks = self._match_files_flat_hierarchy(
                text_files,
                audio_files
            )
            self._log("Matching files in flat hierarchy... done")

            for task_info in matched_tasks:
                self._log("Creating task: '%s'" % str(task_info))
                task = self._create_task(
                    task_info,
                    config_string,
                    sync_map_root_directory,
                    job_os_hierarchy_type
                )
                job.add_task(task)

        # paged hierarchy
        if parameters[gc.PPN_JOB_IS_HIERARCHY_TYPE] == HierarchyType.PAGED:
            self._log("Looking for text/audio pairs in paged hierarchy")
            # find all subdirectories of tasks_root_directory
            # that match gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX
            matched_directories = self._match_directories(
                entries,
                tasks_root_directory,
                parameters[gc.PPN_JOB_IS_TASK_DIRECTORY_NAME_REGEX]
            )
            for matched_directory in matched_directories:
                # rebuild the full path
                matched_directory_full_path = gf.norm_join(
                    tasks_root_directory,
                    matched_directory
                )
                self._log("Looking for text/audio pairs in directory '%s'" % matched_directory_full_path)

                # look for text and audio files there
                text_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    text_file_relative_path,
                    text_file_name_regex
                )
                self._log("Found text files: '%s'" % str(text_files))
                audio_files = self._find_files(
                    entries,
                    matched_directory_full_path,
                    audio_file_relative_path,
                    audio_file_name_regex
                )
                self._log("Found audio files: '%s'" % str(audio_files))

                # if we have found exactly one text and one audio file,
                # create a Task
                if (len(text_files) == 1) and (len(audio_files) == 1):
                    self._log("Exactly one text file and one audio file in '%s'" % matched_directory)
                    task_info = [
                        matched_directory,
                        text_files[0],
                        audio_files[0]
                    ]
                    self._log("Creating task: '%s'" % str(task_info))
                    task = self._create_task(
                        task_info,
                        config_string,
                        sync_map_root_directory,
                        job_os_hierarchy_type
                    )
                    job.add_task(task)
                elif len(text_files) > 1:
                    self._log("More than one text file in '%s'" % matched_directory)
                elif len(audio_files) > 1:
                    self._log("More than one audio file in '%s'" % matched_directory)
                else:
                    self._log("No text nor audio file in '%s'" % matched_directory)

        # return the Job
        return job