Ejemplo n.º 1
0
    def useProperty(self, summary, property, is_doc_summary):
        field = summary.getFieldByAddress(property["offset"].value*8)
        if not field \
        or "value" not in field:
            return
        field = field["value"]
        if not field.hasValue():
            return

        # Get value
        value = field.value
        if isinstance(value, (str, unicode)):
            value = normalizeString(value)
            if not value:
                return

        # Get property identifier
        prop_id = property["id"].value
        if is_doc_summary:
            id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR
            ignore = self.IGNORE_DOC_SUMMARY
        else:
            id_to_attr = self.SUMMARY_ID_TO_ATTR
            ignore = self.IGNORE_SUMMARY
        if prop_id in ignore:
            return

        # Get Hachoir metadata key
        try:
            key = id_to_attr[prop_id]
            use_prefix = False
        except LookupError:
            key = "comment"
            use_prefix = True
        if use_prefix:
            prefix = property["id"].display
            if (prefix in ("TotalEditingTime", "LastPrinted")) \
            and (not field):
                # Ignore null time delta
                return
            value = "%s: %s" % (prefix, value)
        else:
            if (key == "last_modification") and (not field):
                # Ignore null timestamp
                return
        setattr(self, key, value)
Ejemplo n.º 2
0
    def useProperty(self, summary, property, is_doc_summary):
        field = summary.getFieldByAddress(property["offset"].value * 8)
        if not field \
        or "value" not in field:
            return
        field = field["value"]
        if not field.hasValue():
            return

        # Get value
        value = field.value
        if isinstance(value, (str, unicode)):
            value = normalizeString(value)
            if not value:
                return

        # Get property identifier
        prop_id = property["id"].value
        if is_doc_summary:
            id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR
            ignore = self.IGNORE_DOC_SUMMARY
        else:
            id_to_attr = self.SUMMARY_ID_TO_ATTR
            ignore = self.IGNORE_SUMMARY
        if prop_id in ignore:
            return

        # Get Hachoir metadata key
        try:
            key = id_to_attr[prop_id]
            use_prefix = False
        except LookupError:
            key = "comment"
            use_prefix = True
        if use_prefix:
            prefix = property["id"].display
            if (prefix in ("TotalEditingTime", "LastPrinted")) \
            and (not field):
                # Ignore null time delta
                return
            value = "%s: %s" % (prefix, value)
        else:
            if (key == "last_modification") and (not field):
                # Ignore null timestamp
                return
        setattr(self, key, value)
Ejemplo n.º 3
0
    def add(self, value):
        if isinstance(value, tuple):
            if len(value) != 2:
                raise ValueError("Data.add() only accept tuple of 2 elements: (value,text)")
            value, text = value
        else:
            text = None

        # Skip value 'None'
        if value is None:
            return

        if isinstance(value, (str, unicode)):
            value = normalizeString(value)
            if not value:
                return

        # Convert string to Unicode string using charset ISO-8859-1
        if self.conversion:
            try:
                new_value = self.conversion(self.metadata, self.key, value)
            except HACHOIR_ERRORS, err:
                self.metadata.warning("Error during conversion of %r value: %s" % (
                    self.key, err))
                return
            if new_value is None:
                dest_types = " or ".join(str(item.__name__) for item in self.type)
                self.metadata.warning("Unable to convert %s=%r (%s) to %s" % (
                    self.key, value, type(value).__name__, dest_types))
                return
            if isinstance(new_value, tuple):
                if text:
                    value = new_value[0]
                else:
                    value, text = new_value
            else:
                value = new_value
Ejemplo n.º 4
0
    def add(self, value):
        if isinstance(value, tuple):
            if len(value) != 2:
                raise ValueError("Data.add() only accept tuple of 2 elements: (value,text)")
            value, text = value
        else:
            text = None

        # Skip value 'None'
        if value is None:
            return

        if isinstance(value, (str, unicode)):
            value = normalizeString(value)
            if not value:
                return

        # Convert string to Unicode string using charset ISO-8859-1
        if self.conversion:
            try:
                new_value = self.conversion(self.metadata, self.key, value)
            except HACHOIR_ERRORS, err:
                self.metadata.warning("Error during conversion of %r value: %s" % (self.key, err))
                return
            if new_value is None:
                dest_types = " or ".join(str(item.__name__) for item in self.type)
                self.metadata.warning(
                    "Unable to convert %s=%r (%s) to %s" % (self.key, value, type(value).__name__, dest_types)
                )
                return
            if isinstance(new_value, tuple):
                if text:
                    value = new_value[0]
                else:
                    value, text = new_value
            else:
                value = new_value
Ejemplo n.º 5
0
    def add(self, value):
        if isinstance(value, tuple):
            if len(value) != 2:
                raise ValueError("Data.add() only accept tuple of 2 elements: (value,text)")
            value, text = value
        else:
            text = None

        # Skip value 'None'
        if value is None:
            return

        if isinstance(value, (str, unicode)):
            value = normalizeString(value)
            if not value:
                return

        # Convert string to Unicode string using charset ISO-8859-1
        if self.conversion:
            try:
                new_value = self.conversion(self.metadata, self.key, value)
            except HACHOIR_ERRORS as err:
                self.metadata.warning("Error during conversion of %r value: %s" % (
                    self.key, err))
                return
            if new_value is None:
                dest_types = " or ".join(str(item.__name__) for item in self.type)
                self.metadata.warning("Unable to convert %s=%r (%s) to %s" % (
                    self.key, value, type(value).__name__, dest_types))
                return
            if isinstance(new_value, tuple):
                if text:
                    value = new_value[0]
                else:
                    value, text = new_value
            else:
                value = new_value
        elif isinstance(value, str):
            value = unicode(value, "ISO-8859-1")

        if self.type and not isinstance(value, self.type):
            dest_types = " or ".join(str(item.__name__) for item in self.type)
            self.metadata.warning("Key %r: value %r type (%s) is not %s" % (
                self.key, value, type(value).__name__, dest_types))
            return

        # Skip empty strings
        if isinstance(value, unicode):
            value = normalizeNewline(value)
            if config.MAX_STR_LENGTH \
            and config.MAX_STR_LENGTH < len(value):
                value = value[:config.MAX_STR_LENGTH] + "(...)"

        # Skip duplicates
        if value in self:
            return

        # Use filter
        if self.filter and not self.filter(value):
            self.metadata.warning("Skip value %s=%r (filter)" % (self.key, value))
            return

        # For string, if you have "verlongtext" and "verylo",
        # keep the longer value
        if isinstance(value, unicode):
            for index, item in enumerate(self.values):
                item = item.value
                if not isinstance(item, unicode):
                    continue
                if value.startswith(item):
                    # Find longer value, replace the old one
                    self.values[index] = self._createItem(value, text)
                    return
                if item.startswith(value):
                    # Find truncated value, skip it
                    return

        # Add new value
        self.values.append(self._createItem(value, text))