Beispiel #1
0
    def _get_key(self, string_tag, tag):
        """Return the key that corresponds to a given translatable string.

        Uses the attributes of the outer `string_tag` and the inner `tag`
        in order to construct a key.

        :param NewDumbXml string_tag: the outer XML tag that
            may contain various translatable strings
        :param NewDumbXml tag: the inner XML tag that contains
            one translatable string
        :return: the key of the corresponding translatable string
        :rtype: str
        """
        try:
            # Base string
            if tag.tag == self.base_string_name:
                return string_tag.attrib[self.string_key_name]

            # Variant
            else:
                return "{key}{separator}{variant_id}".format(
                    key=string_tag.attrib[self.string_key_name],
                    separator=COMPOSITE_KEY_SEPARATOR,
                    variant_id=tag.attrib[self.variant_string_context_name],
                )
        except KeyError:
            raise ParseError(
                'Missing "{key}" attribute in <{string}> node, '
                'parent of "<{tag.tag}>{tag.content}</{tag.tag}>"'.format(
                    key=self.string_key_name,
                    string=self.string_name,
                    tag=tag,
                ))
Beispiel #2
0
    def _get_yaml_data_to_parse(self, yaml_data):
        keys = list(six.iterkeys(yaml_data))
        if len(keys) > 1:
            raise ParseError("YAML file contains more than one root keys.")

        root_key = keys[0]
        return yaml_data[root_key][0]
Beispiel #3
0
    def _load_yaml(self, content, loader):
        """
        Loads a YAML stream and returns a dictionary
        representation for YAML data

        Args:
            content: A string, YAML content
            loader: YAML Loader class or None

        Returns:
            A dictionary
        """
        try:
            return yaml.load(content, Loader=loader)
        except yaml.scanner.ScannerError as e:
            raise ParseError(six.text_type(e))
        except Exception as e:
            raise ParseError(six.text_type(e))
Beispiel #4
0
    def _get_root_node_pos(self, string):
        """Return the position of the root node in the given string.

        :param str string: the content to search for the root node.
        :return: the integer position
        :rtype: int
        :raise ParseError: if the root node is not found
        """
        try:
            return string.index('<{}'.format(self.root_name))
        except ValueError:
            raise ParseError('Root node "<{root}>" not found'.format(
                root=self.root_name, ))
Beispiel #5
0
    def _validate_plural_content_format(self, key, serialized_strings,
                                        all_matches):
        """
        Make sure the serialized content is properly formatted
        as one or more pluralized strings.
        :param key: the string key
            { item_count, plural, zero {...} one {...} other {...}}
        :param serialized_strings: the part of the value that holds the
            string information only, e.g.
            zero {...} one {...} other {...}
        :param all_matches: a pyparsing element that matches all strings
            formatted like '<alphanumeric> {...}'

        :raise ParseError: if the given string has an invalid structure
        """
        # Replace all matches with spaces in the given string.
        remaining_str = serialized_strings
        for match in all_matches:
            remaining_str = remaining_str.replace(match[0], '')

        # Then make sure all whitespace is removed as well
        # Special characters may be present with double backslashes,
        # e.g. \\n
        remaining_str = remaining_str.replace('\\n', '\n')\
            .replace('\\t', '\t')\
            .strip()

        if len(remaining_str) > 0:
            raise ParseError(
                'Invalid format of pluralized entry '
                'with key: "{key}", serialized translations: "{trans_str}". '
                'Could not parse the string at or near '
                'the following chunk: "{chunk}". '
                'It contains either invalid braces ("{{", "}}") '
                'or invalid characters.'.format(
                    key=key,
                    trans_str=serialized_strings,
                    chunk=remaining_str,
                ))
Beispiel #6
0
    def construct_sequence(self, node, deep=True):
        """
        Override `yaml.SafeLoader.construct_sequence` to return a `Node` tuple
        instead of the default which is `value`.
        """
        if not isinstance(node, yaml.SequenceNode):
            raise ParseError(
                "Expected a mapping node, but found {}".format(node.id)
            )
        values = []

        for value_node in node.value:
            # don't process binary values
            if value_node.tag == YAML_BINARY_ID:
                continue
            try:
                value = self.construct_object(value_node, deep=deep)
            except ConstructorError as e:
                print("During parsing YAML file: {}".format(six.text_type(e)))
                continue
            if not(isinstance(value, six.text_type) or
                   isinstance(value, six.binary_type) or
                   isinstance(value, list) or
                   isinstance(value, dict)):
                continue
            start = value_node.start_mark.index
            end = value_node.end_mark.index
            if isinstance(value, (dict, list)):
                if value_node.flow_style:
                    style = 'flow'
                else:
                    style = 'block'
                values.append(Node(value, start, end, style, None))
            else:
                style = value_node.style
                values.append(Node(value, start, end, style, None))
        return values
Beispiel #7
0
    def _handle_invalid_plural_format(self, serialized_strings,
                                      any_plural_item, key, value):
        """
        Raise a descriptive ParseError exception when the serialized
        translation string of a plural string is not properly formatted.

        :param serialized_strings:
        :param any_plural_item: a forgiving pyparsing element that matches all
            strings formatted like '<alphanumeric> {...}'

        :raise: ParseError
        """
        all_matches = any_plural_item.searchString(serialized_strings)
        all_keys = [match[0] for match in all_matches]

        invalid_rules = [
            rule for rule in all_keys
            if rule not in six.iterkeys(Handler._RULES_ATOI)
        ]
        raise ParseError('Invalid plural rule(s): "{}" in pluralized entry '
                         'with key: {}, value: "{}". '
                         'Allowed values are: {}'.format(
                             ', '.join(invalid_rules), key, value,
                             ', '.join(six.iterkeys(Handler._RULES_ATOI))))
Beispiel #8
0
    def _parse_section(self, offset, section):
        try:
            order_str, timings, string = section.split('\n', 2)
        except ValueError:
            raise ParseError(
                u"Not enough data on subtitle section on line {}. Order "
                u"number, timings and subtitle content are needed".
                format(self.transcriber.line_number)
            )

        # first line, order
        order_parse_error = False
        try:
            order_int = int(order_str.strip())
        except ValueError:
            order_parse_error = True
        else:
            if order_int <= 0:
                order_parse_error = True
        if order_parse_error:
            raise ParseError(
                u"Order number on line {line_no} ({order_no}) must be a "
                u"positive integer".format(
                    line_no=self.transcriber.line_number,
                    order_no=order_str,
                )
            )
        if self.max_order is not None and order_int <= self.max_order:
            raise ParseError(
                u"Order numbers must be in ascending order; number in line "
                u"{line_no} ({order_no}) is wrong".format(
                    line_no=self.transcriber.line_number,
                    order_no=order_int,
                )
            )
        else:
            self.max_order = order_int

        # second line, timings
        timings_parse_error = False
        try:
            splitted = timings.split(None, 3)
            if len(splitted) == 3:
                start, arrow, end = splitted
            else:
                start, arrow, end, _ = splitted
        except ValueError:
            timings_parse_error = True
        else:
            if arrow != u"-->":
                timings_parse_error = True
        if timings_parse_error:
            raise ParseError(
                u"Timings on line {} don't follow '[start] --> [end] "
                "(position)' pattern".format(
                    self.transcriber.line_number + 1
                )
            )
        try:
            start = self._format_timing(start)
        except ValueError:
            raise ParseError(
                u"Problem with start of timing at line {line_no}: '{start}'".
                format(line_no=self.transcriber.line_number + 1, start=start)
            )
        try:
            end = self._format_timing(end)
        except ValueError:
            raise ParseError(
                u"Problem with end of timing at line {line_no}: '{end}'".
                format(line_no=self.transcriber.line_number + 1, end=end)
            )

        # Content
        string_stripped = string.strip()
        if string_stripped == u"":
            raise ParseError(u"Subtitle is empty on line {}".
                             format(self.transcriber.line_number + 2))

        string = OpenString(order_str.strip(), string, order=order_int,
                            occurrences="{},{}".format(start, end))
        return offset + len(order_str) + 1 + len(timings) + 1, string
Beispiel #9
0
    def construct_mapping(self, node, deep=True):
        """
        Override `yaml.SafeLoader.construct_mapping` to return for each item
        of the mapping a tuple of the form `(key, (value, start, end, style,
        tag))` instead of the default which is `(key, value)`.
        :raise ParseError: if node is not a MappingNode
            or duplicate keys are found.
        """
        if not isinstance(node, yaml.MappingNode):
            raise ParseError(
                "Expected a mapping node, but found {}".format(node.id),
            )
        pairs = []
        for key_node, value_node in node.value:
            # don't process binary values
            if value_node.tag == YAML_BINARY_ID:
                continue
            try:
                key = self.construct_object(key_node, deep=deep)
                value = self.construct_object(value_node, deep=deep)
            except ConstructorError as e:
                print("During parsing YAML file: {}".format(six.text_type(e)))
                continue

            # raise ConstructorError in case of invalid key
            try:
                hash(key)
            except TypeError as e:
                print("Error while constructing a mapping, found unacceptable"
                      " key ({})".format(six.text_type(e)))
                continue

            if not(isinstance(value, six.text_type) or
                   isinstance(value, six.binary_type) or
                   isinstance(value, list) or
                   isinstance(value, dict)):
                continue
            start = value_node.start_mark.index
            end = value_node.end_mark.index
            style = ''

            # take into account key strings that translate into
            # boolean objects.
            if isinstance(key, bool):
                key = self.stream[key_node.start_mark.index:
                                  key_node.end_mark.index]

            if isinstance(value, list) or isinstance(value, dict):
                if value_node.flow_style:
                    style = 'flow'
                else:
                    style = 'block'
                    start = (start -
                             (value_node.start_mark.column -
                              key_node.start_mark.column))

                    # re calculate end position taking into account
                    # comments after a block node (seq or mapping)
                    end = self._calculate_block_end_pos(start, end)
            elif (isinstance(value, six.binary_type) or
                  isinstance(value, six.text_type)):
                style = value_node.style

            # Setup the node's tag
            tag = None
            if (
                hasattr(value_node, 'tag')
                and self._is_custom_tag(value_node.tag)
            ):
                tag = six.text_type(value_node.tag)

            value = Node(value, start, end, style, tag)
            pairs.append((key, value))

        # If there are duplicate keys, throw an exception
        pair_keys = [pair[0] for pair in pairs]
        seen = set()
        duplicates = set()
        seen_add = seen.add
        duplicate_add = duplicates.add
        for x in pair_keys:
            if x not in seen:
                seen_add(x)
            else:
                duplicate_add(x)

        if len(duplicates):
            duplicates_list = list(duplicates)
            error_duplicate_keys = ', '.join(key for key in duplicates_list)
            raise ParseError(
                "Duplicate keys found ({})".format(error_duplicate_keys)
            )

        return pairs