Esempio n. 1
0
    def _extract_description_link(self, publication):
        """Extract description from the publication object and create a Hyperlink.DESCRIPTION link containing it.

        :param publication: Publication object
        :type publication: opds2_ast.Publication

        :return: LinkData object containing publication's description
        :rtype: LinkData
        """
        self._logger.debug(
            "Started extracting a description link from {0}".format(
                encode(publication.metadata.description)))

        description_link = None

        if publication.metadata.description:
            description_link = LinkData(
                rel=Hyperlink.DESCRIPTION,
                media_type=MediaTypes.TEXT_PLAIN,
                content=publication.metadata.description,
            )

        self._logger.debug(
            "Finished extracting a description link from {0}: {1}".format(
                encode(publication.metadata.description),
                encode(description_link)))

        return description_link
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the manifest node.

        :param node: Manifest's metadata
        :type node: OPDS2Feed
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        super(OPDS2SemanticAnalyzer, self).visit(node)

        if (node.publications is None and node.navigation is None
                and node.groups is None):
            with self._record_errors():
                raise MISSING_REQUIRED_FEED_SUB_COLLECTIONS(node=node,
                                                            node_property=None)

        if node.publications is not None:
            with self._record_errors():
                node.publications.accept(self)
        if node.navigation is not None:
            with self._record_errors():
                node.navigation.accept(self)
        if node.groups is not None:
            with self._record_errors():
                node.groups.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
Esempio n. 3
0
    def _extract_links(self, publication, feed_self_url):
        """Extract a list of LinkData objects from a list of webpub-manifest-parser links.

        :param publication: Publication object
        :type publication: ast_core.Publication

        :param feed_self_url: Feed's self URL
        :type feed_self_url: str

        :return: List of links metadata
        :rtype: List[LinkData]
        """
        self._logger.debug("Started extracting links from {0}".format(
            encode(publication.links)))

        links = []

        for link in publication.links:
            link_metadata = self._extract_link(link, feed_self_url)
            links.append(link_metadata)

        description_link = self._extract_description_link(publication)
        if description_link:
            links.append(description_link)

        image_links = self._extract_image_links(publication, feed_self_url)
        if image_links:
            links.extend(image_links)

        self._logger.debug("Finished extracting links from {0}: {1}".format(
            encode(publication.links), encode(links)))

        return links
    def parse(self, value):
        """Make sure that at least one of the inner parsers succeed, otherwise raise the first validation error.

        :param value: Value
        :type value: Any

        :return: First valid value
        :rtype: Any

        :raise: ValidationError
        """
        first_validation_error = None

        for parser in self._inner_parsers:
            self._logger.debug(u"Running {0} parser".format(parser))

            try:
                result = parser.parse(value)

                self._logger.debug(u"Parser {0} succeeded: {1}".format(
                    parser, encode(result)))

                return result
            except ValueParsingError as error:
                self._logger.debug(u"Parser {0} failed".format(encode(parser)))

                if first_validation_error is None:
                    first_validation_error = error

        self._logger.debug(u"All parsers failed")

        raise first_validation_error
    def parse(self, value):
        """Parse the value into a list of parsed values.

        :param value: Value
        :type value: Any

        :return: List consisting of parsed items
        :rtype: List

        :raise: ValidationError
        """
        if not isinstance(value, list):
            raise ValueParsingError(u"Value '{0}' must be a list".format(
                encode(value)))

        result = []
        seen = set()

        for item in value:
            item = self._item_parser.parse(item)

            if self._unique_items and item in seen:
                raise ValueParsingError(u"Item '{0}' is not unique".format(
                    encode(item)))

            result.append(item)
            seen.add(item)

        return list(result)
    def parse(self, value):
        """Parse a JSON object into a Python dictionary.

        :param value: Value
        :type value: Any

        :return: Python dictionary containing parsed items
        :rtype: Dict

        :raise: ValidationError
        """
        if not isinstance(value, dict):
            raise ValueParsingError("Value must be a dictionary")

        result = {}

        for key, item in value.items():
            if not isinstance(key, str):
                raise ValueParsingError(u"Key '{0}' must be a string".format(
                    encode(key)))

            if self._properties_regex and not self._properties_regex.match(
                    key):
                raise ValueParsingError(
                    u"Key '{0}' does not match the pattern '{1}'".format(
                        encode(key), self._properties_regex))

            item = self._properties_parser.parse(item)

            result[key] = item

        return result
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the OPDS 2.0 group.

        :param node: OPDS 2.0 group
        :type node: OPDS2Group
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        # FIXME: It seems that group definition relaxes requirements for having metadata
        # It means we have to override default behaviour
        # super(OPDS2SemanticAnalyzer, self).visit(node)

        if node.metadata:
            node.metadata.accept(self)

        if node.publications and node.navigation:
            raise WRONG_GROUP_STRUCTURE

        if node.publications:
            node.publications.accept(self)
        if node.navigation:
            node.navigation.accept(self)
        if node.links:
            node.links.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the OPDS 2.0 publication.

        :param node: OPDS 2.0 publication
        :type node: OPDS2Publication
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        super(OPDS2SemanticAnalyzer, self).visit(node)

        acquisition_links = [
            OPDS2LinkRelationsRegistry.PREVIEW.key,
            OPDS2LinkRelationsRegistry.ACQUISITION.key,
            OPDS2LinkRelationsRegistry.BUY.key,
            OPDS2LinkRelationsRegistry.OPEN_ACCESS.key,
            OPDS2LinkRelationsRegistry.BORROW.key,
            OPDS2LinkRelationsRegistry.SAMPLE.key,
            OPDS2LinkRelationsRegistry.SUBSCRIBE.key,
        ]

        for link in node.links:
            if link.rels is not None and set(acquisition_links) & set(
                    link.rels):
                break
        else:
            raise MISSING_ACQUISITION_LINK

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):
        """Perform semantic analysis of the manifest node.

        :param node: Manifest-like node
        :type node: Manifestlike
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        node.metadata.accept(self)
        node.links.accept(self)

        for link in node.links:
            if not link.rels:
                raise MISSING_MANIFEST_LINK_REL_PROPERTY_ERROR

        self_link = first_or_default(
            node.links.get_by_rel(RWPMLinkRelationsRegistry.SELF.key)
        )

        if self_link is None:
            raise MISSING_SELF_LINK_ERROR

        parser = URIParser()

        try:
            parser.parse(self_link.href)
        except ValueParsingError:
            raise WRONG_SELF_LINK_HREF_FORMAT

        node.sub_collections.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the manifest's metadata.

        :param node: Manifest's metadata
        :type node: Metadata
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
Esempio n. 11
0
    def _extract_image_links(self, publication, feed_self_url):
        """Extracts a list of LinkData objects containing information about artwork.

        :param publication: Publication object
        :type publication: ast_core.Publication

        :param feed_self_url: Feed's self URL
        :type feed_self_url: str

        :return: List of links metadata
        :rtype: List[LinkData]
        """
        self._logger.debug("Started extracting image links from {0}".format(
            encode(publication.images)))

        if not publication.images:
            return []

        # FIXME: This code most likely will not work in general.
        # There's no guarantee that these images have the same media type,
        # or that the second-largest image isn't far too large to use as a thumbnail.
        # Instead of using the second-largest image as a thumbnail,
        # find the image that would make the best thumbnail
        # because of its dimensions, media type, and aspect ratio:
        #       IDEAL_COVER_ASPECT_RATIO = 2.0/3
        #       IDEAL_IMAGE_HEIGHT = 240
        #       IDEAL_IMAGE_WIDTH = 160

        sorted_raw_image_links = list(
            reversed(
                sorted(
                    publication.images.links,
                    key=lambda link: (link.width or 0, link.height or 0),
                )))
        image_links = []

        if len(sorted_raw_image_links) > 0:
            cover_link = self._extract_link(
                sorted_raw_image_links[0],
                feed_self_url,
                default_link_rel=Hyperlink.IMAGE,
            )
            image_links.append(cover_link)

        if len(sorted_raw_image_links) > 1:
            cover_link = self._extract_link(
                sorted_raw_image_links[1],
                feed_self_url,
                default_link_rel=Hyperlink.THUMBNAIL_IMAGE,
            )
            image_links.append(cover_link)

        self._logger.debug(
            "Finished extracting image links from {0}: {1}".format(
                encode(publication.images), encode(image_links)))

        return image_links
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the compact collection node.

        :param node: Collection node
        :type node: CompactCollection
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        node.links.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the list of sub-collections.

        :param node: CollectionList node
        :type node: CollectionList
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        for collection in node:
            collection.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the list of links.

        :param node: Manifest's metadata
        :type node: LinkList
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        for link in node:
            link.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the link node.

        :param node: Link node
        :type node: Link
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        if not node.templated:
            parser = URIReferenceParser()
            parser.parse(node.href)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the OPDS 2.0 navigation.

        :param node: OPDS 2.0 navigation
        :type node: OPDS2Navigation
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        self.visit(cast(node, CompactCollection))

        for link in node.links:
            if link.title is None:
                raise MISSING_NAVIGATION_LINK_TITLE_ERROR

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
Esempio n. 17
0
    def _set_scalar_value(self, json_content, ast_object):
        """Parse a scalar string value and initialize an object's property with it.

        :param json_content: Scalar string value containing a required object's property
        :type json_content: str

        :param ast_object: AST object
        :type ast_object: Node
        """
        required_object_properties = PropertiesGrouping.get_required_class_properties(
            ast_object.__class__)

        if len(required_object_properties) != 1:
            raise BaseSyntaxError(
                u"There are {0} required properties in {1} but only a single value ({2} was provided"
                .format(len(required_object_properties), encode(ast_object),
                        json_content))

        required_object_property_name, required_object_property = first_or_default(
            required_object_properties)

        self._set_property_value(
            ast_object,
            required_object_property_name,
            required_object_property,
            json_content,
        )

        # We need to initialize other properties with default values
        self._set_non_scalar_value(None, ast_object,
                                   {required_object_property_name})
Esempio n. 18
0
    def _set_property_value(self, ast_object, object_property_name,
                            object_property, property_value):
        """Set the value of the specified property.

        :param ast_object: AST object
        :type ast_object: Node

        :param object_property_name: Name of the property
        :type object_property_name: str

        :param object_property: Object's property
        :type object_property: Property

        :param property_value: Value to be set
        :type property_value: Any
        """
        self._logger.debug(
            u"Property '{0}' has the following value: {1}".format(
                object_property.key, encode(property_value)))

        if property_value is None and object_property.default_value is not None:
            property_value = object_property.default_value

        if object_property.required and property_value is None:
            raise MissingPropertyError(ast_object.__class__, object_property)

        if property_value is not None:
            property_value = object_property.parser.parse(property_value)

        property_value = self._format_property_value(property_value,
                                                     object_property)

        setattr(ast_object, object_property_name, property_value)
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the collection node.

        :param node: Collection node
        :type node: Collection
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        with self._record_errors():
            node.metadata.accept(self)

        with self._record_errors():
            node.links.accept(self)

        with self._record_errors():
            node.sub_collections.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
Esempio n. 20
0
    def _extract_media_types_and_drm_scheme_from_link(self, link):
        """Extract information about content's media type and used DRM schema from the link.

        :param link: Link object
        :type link: ast_core.Link

        :return: 2-tuple containing information about the content's media type and its DRM schema
        :rtype: List[Tuple[str, str]]
        """
        self._logger.debug(
            "Started extracting media types and a DRM scheme from {0}".format(
                encode(link)))

        media_types_and_drm_scheme = []

        if link.properties:
            if (not link.properties.availability
                    or link.properties.availability.state
                    == opds2_ast.OPDS2AvailabilityType.AVAILABLE.value):
                for acquisition_object in link.properties.indirect_acquisition:
                    nested_acquisition_object = acquisition_object

                    while nested_acquisition_object.child:
                        nested_acquisition_object = first_or_default(
                            acquisition_object.child)

                    drm_scheme = (acquisition_object.type
                                  if acquisition_object.type
                                  in DeliveryMechanism.KNOWN_DRM_TYPES else
                                  DeliveryMechanism.NO_DRM)

                    media_types_and_drm_scheme.append(
                        (nested_acquisition_object.type, drm_scheme))
        else:
            if (link.type in MediaTypes.BOOK_MEDIA_TYPES
                    or link.type in MediaTypes.AUDIOBOOK_MEDIA_TYPES):
                media_types_and_drm_scheme.append(
                    (link.type, DeliveryMechanism.NO_DRM))

        self._logger.debug(
            "Finished extracting media types and a DRM scheme from {0}: {1}".
            format(encode(link), encode(media_types_and_drm_scheme)))

        return media_types_and_drm_scheme
Esempio n. 21
0
    def _extract_contributors(self,
                              contributors,
                              default_role=Contributor.AUTHOR_ROLE):
        """Extract a list of ContributorData objects from the webpub-manifest-parser's contributor.

        :param contributors: Parsed contributor object
        :type contributors: List[core_ast.Contributor]

        :param default_role: Default role
        :type default_role: Optional[str]

        :return: List of contributors metadata
        :rtype: List[ContributorData]
        """
        self._logger.debug("Started extracting contributors metadata")

        contributor_metadata_list = []

        for contributor in contributors:
            self._logger.debug(
                "Started extracting contributor metadata from {0}".format(
                    encode(contributor)))

            contributor_metadata = ContributorData(
                sort_name=contributor.sort_as,
                display_name=contributor.name,
                family_name=None,
                wikipedia_name=None,
                roles=contributor.roles if contributor.roles else default_role,
            )

            self._logger.debug(
                "Finished extracting contributor metadata from {0}: {1}".
                format(encode(contributor), encode(contributor_metadata)))

            contributor_metadata_list.append(contributor_metadata)

        self._logger.debug(
            "Finished extracting contributors metadata: {0}".format(
                encode(contributor_metadata_list)))

        return contributor_metadata_list
Esempio n. 22
0
    def visit(self, node):  # pylint: disable=E0102
        """Perform semantic analysis of the OPDS 2.0 publication.

        :param node: ODL 2.0 publication
        :type node: ODLPublication
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        if (not node.licenses or len(node.licenses) == 0) and (
            (not node.licenses or len(node.links) == 0)
            or not node.links.get_by_rel(OPDS2LinkRelationsRegistry.OPEN_ACCESS.key)
        ):
            with self._record_errors():
                raise ODL_PUBLICATION_MUST_CONTAIN_EITHER_LICENSES_OR_OA_ACQUISITION_LINK_ERROR(
                    node=node, node_property=None
                )
        elif node.licenses:
            node.licenses.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
Esempio n. 23
0
    def _parse_nested_object(self, property_value, object_property):
        """Parse nested object(s) (if any) and return the result of parsing.

        :param property_value: Raw property's value (probably) containing nested object(s)
        :type property_value: Any

        :param object_property: Object's property
        :type object_property: properties.Property

        :return: Nested object's value
        :rtype: Any
        """
        if property_value is None:
            return property_value

        self._logger.debug(
            u"Started looking for nested property {0}".format(object_property))

        type_parsers_result = find_parser(object_property.parser, TypeParser)

        self._logger.debug(u"Found the following type parsers: {0}".format(
            type_parsers_result))

        found = False

        for parent_parser, type_parser in type_parsers_result:
            if isinstance(parent_parser, ArrayParser) and isinstance(
                    property_value, list):
                processed_items = []

                for item in property_value:
                    processed_item = self._parse_object(item, type_parser.type)
                    processed_items.append(processed_item)

                found = True
                property_value = processed_items
                break
        else:
            for parent_parser, type_parser in type_parsers_result:
                if not isinstance(parent_parser, ArrayParser):
                    found = True
                    property_value = self._parse_object(
                        property_value, type_parser.type)
                    break

        if found:
            self._logger.debug(
                u"Finished parsing nested property {0}: {1}".format(
                    object_property, encode(property_value)))
        else:
            self._logger.debug(
                u"Property {0} is not nested".format(object_property))

        return property_value
Esempio n. 24
0
    def _extract_subjects(self, subjects):
        """Extract a list of SubjectData objects from the webpub-manifest-parser's subject.

        :param subjects: Parsed subject object
        :type subjects: List[core_ast.Subject]

        :return: List of subjects metadata
        :rtype: List[SubjectMetadata]
        """
        self._logger.debug("Started extracting subjects metadata")

        subject_metadata_list = []

        for subject in subjects:
            self._logger.debug(
                "Started extracting subject metadata from {0}".format(
                    encode(subject)))

            scheme = subject.scheme

            subject_type = Subject.by_uri.get(scheme)
            if not subject_type:
                # We can't represent this subject because we don't
                # know its scheme. Just treat it as a tag.
                subject_type = Subject.TAG

            subject_metadata = SubjectData(type=subject_type,
                                           identifier=subject.code,
                                           name=subject.name,
                                           weight=1)

            subject_metadata_list.append(subject_metadata)

            self._logger.debug(
                "Finished extracting subject metadata from {0}: {1}".format(
                    encode(subject), encode(subject_metadata)))

        self._logger.debug("Finished extracting subjects metadata: {0}".format(
            encode(subject_metadata_list)))

        return subject_metadata_list
Esempio n. 25
0
    def _extract_link(self, link, feed_self_url, default_link_rel=None):
        """Extract a LinkData object from webpub-manifest-parser's link.

        :param link: webpub-manifest-parser's link
        :type link: ast_core.Link

        :param feed_self_url: Feed's self URL
        :type feed_self_url: str

        :param default_link_rel: Default link's relation
        :type default_link_rel: Optional[str]

        :return: Link metadata
        :rtype: LinkData
        """
        self._logger.debug("Started extracting link metadata from {0}".format(
            encode(link)))

        # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one.
        rights_uri = RightsStatus.rights_uri_from_string("")
        rel = first_or_default(link.rels, default_link_rel)
        media_type = link.type
        href = link.href

        if feed_self_url and not urlparse(href).netloc:
            # This link is relative, so we need to get the absolute url
            href = urljoin(feed_self_url, href)

        link_metadata = LinkData(
            rel=rel,
            href=href,
            media_type=media_type,
            rights_uri=rights_uri,
            content=None,
        )

        self._logger.debug(
            "Finished extracting link metadata from {0}: {1}".format(
                encode(link), encode(link_metadata)))

        return link_metadata
Esempio n. 26
0
    def _extract_image_links(self, publication, feed_self_url):
        """Extracts a list of LinkData objects containing information about artwork.

        :param publication: Publication object
        :type publication: ast_core.Publication

        :param feed_self_url: Feed's self URL
        :type feed_self_url: str

        :return: List of links metadata
        :rtype: List[LinkData]
        """
        self._logger.debug("Started extracting image links from {0}".format(
            encode(publication.images)))

        image_links = []

        for image_link in publication.images.links:
            thumbnail_link = self._extract_link(
                image_link,
                feed_self_url,
                default_link_rel=Hyperlink.THUMBNAIL_IMAGE,
            )
            thumbnail_link.rel = Hyperlink.THUMBNAIL_IMAGE

            cover_link = self._extract_link(
                image_link,
                feed_self_url,
                default_link_rel=Hyperlink.IMAGE,
            )
            cover_link.rel = Hyperlink.IMAGE
            cover_link.thumbnail = thumbnail_link
            image_links.append(cover_link)

        self._logger.debug(
            "Finished extracting image links from {0}: {1}".format(
                encode(publication.images), encode(image_links)))

        return image_links
    def visit(self, node):
        """Perform semantic analysis of the manifest node.

        :param node: Manifest-like node
        :type node: Manifestlike
        """
        self._logger.debug(u"Started processing {0}".format(encode(node)))

        self.context.reset()

        with self._record_errors():
            node.metadata.accept(self)

        with self._record_errors():
            node.links.accept(self)

        with self._record_errors():
            self._check_manifest_self_link(node)

        with self._record_errors():
            node.sub_collections.accept(self)

        self._logger.debug(u"Finished processing {0}".format(encode(node)))
    def parse(self, value):
        """Parse a string value.

        :param value: Value
        :type value: Any

        :return: Parsed string value
        :rtype: str

        :raise: ValidationError
        """
        if not is_string(value):
            raise ValueParsingError(u"Value '{0}' must be a string".format(
                encode(value)))

        return value
    def parse(self, value):
        """Check that the value has the correct type.

        :param value: Value
        :type value: Any

        :return: Value
        :rtype: Any

        :raise: ValidationError
        """
        if not isinstance(value, self._type):
            raise ValueParsingError(
                u"Value '{0}' must be an instance of '{1}'".format(
                    encode(value), self._type))

        return value
    def parse(self, value):
        """Make sure that the value is a part of the enumeration and return it back.

        :param value: Value
        :type value: Any

        :return: Parsed string value
        :rtype: int

        :raise: ValidationError
        """
        value = super(EnumParser, self).parse(value)

        if value not in self._items:
            raise ValueParsingError(u"Value '{0}' is not among {1}".format(
                encode(value), self._items))

        return value