def _extract_description_link(self, publication): """Extract description from the publication object and create a Hyperlink.DESCRIPTION link containing it. :param publication: Publication object :type publication: opds2_ast.Publication :return: LinkData object containing publication's description :rtype: LinkData """ self._logger.debug( "Started extracting a description link from {0}".format( encode(publication.metadata.description))) description_link = None if publication.metadata.description: description_link = LinkData( rel=Hyperlink.DESCRIPTION, media_type=MediaTypes.TEXT_PLAIN, content=publication.metadata.description, ) self._logger.debug( "Finished extracting a description link from {0}: {1}".format( encode(publication.metadata.description), encode(description_link))) return description_link
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the manifest node. :param node: Manifest's metadata :type node: OPDS2Feed """ self._logger.debug(u"Started processing {0}".format(encode(node))) super(OPDS2SemanticAnalyzer, self).visit(node) if (node.publications is None and node.navigation is None and node.groups is None): with self._record_errors(): raise MISSING_REQUIRED_FEED_SUB_COLLECTIONS(node=node, node_property=None) if node.publications is not None: with self._record_errors(): node.publications.accept(self) if node.navigation is not None: with self._record_errors(): node.navigation.accept(self) if node.groups is not None: with self._record_errors(): node.groups.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def _extract_links(self, publication, feed_self_url): """Extract a list of LinkData objects from a list of webpub-manifest-parser links. :param publication: Publication object :type publication: ast_core.Publication :param feed_self_url: Feed's self URL :type feed_self_url: str :return: List of links metadata :rtype: List[LinkData] """ self._logger.debug("Started extracting links from {0}".format( encode(publication.links))) links = [] for link in publication.links: link_metadata = self._extract_link(link, feed_self_url) links.append(link_metadata) description_link = self._extract_description_link(publication) if description_link: links.append(description_link) image_links = self._extract_image_links(publication, feed_self_url) if image_links: links.extend(image_links) self._logger.debug("Finished extracting links from {0}: {1}".format( encode(publication.links), encode(links))) return links
def parse(self, value): """Make sure that at least one of the inner parsers succeed, otherwise raise the first validation error. :param value: Value :type value: Any :return: First valid value :rtype: Any :raise: ValidationError """ first_validation_error = None for parser in self._inner_parsers: self._logger.debug(u"Running {0} parser".format(parser)) try: result = parser.parse(value) self._logger.debug(u"Parser {0} succeeded: {1}".format( parser, encode(result))) return result except ValueParsingError as error: self._logger.debug(u"Parser {0} failed".format(encode(parser))) if first_validation_error is None: first_validation_error = error self._logger.debug(u"All parsers failed") raise first_validation_error
def parse(self, value): """Parse the value into a list of parsed values. :param value: Value :type value: Any :return: List consisting of parsed items :rtype: List :raise: ValidationError """ if not isinstance(value, list): raise ValueParsingError(u"Value '{0}' must be a list".format( encode(value))) result = [] seen = set() for item in value: item = self._item_parser.parse(item) if self._unique_items and item in seen: raise ValueParsingError(u"Item '{0}' is not unique".format( encode(item))) result.append(item) seen.add(item) return list(result)
def parse(self, value): """Parse a JSON object into a Python dictionary. :param value: Value :type value: Any :return: Python dictionary containing parsed items :rtype: Dict :raise: ValidationError """ if not isinstance(value, dict): raise ValueParsingError("Value must be a dictionary") result = {} for key, item in value.items(): if not isinstance(key, str): raise ValueParsingError(u"Key '{0}' must be a string".format( encode(key))) if self._properties_regex and not self._properties_regex.match( key): raise ValueParsingError( u"Key '{0}' does not match the pattern '{1}'".format( encode(key), self._properties_regex)) item = self._properties_parser.parse(item) result[key] = item return result
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the OPDS 2.0 group. :param node: OPDS 2.0 group :type node: OPDS2Group """ self._logger.debug(u"Started processing {0}".format(encode(node))) # FIXME: It seems that group definition relaxes requirements for having metadata # It means we have to override default behaviour # super(OPDS2SemanticAnalyzer, self).visit(node) if node.metadata: node.metadata.accept(self) if node.publications and node.navigation: raise WRONG_GROUP_STRUCTURE if node.publications: node.publications.accept(self) if node.navigation: node.navigation.accept(self) if node.links: node.links.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the OPDS 2.0 publication. :param node: OPDS 2.0 publication :type node: OPDS2Publication """ self._logger.debug(u"Started processing {0}".format(encode(node))) super(OPDS2SemanticAnalyzer, self).visit(node) acquisition_links = [ OPDS2LinkRelationsRegistry.PREVIEW.key, OPDS2LinkRelationsRegistry.ACQUISITION.key, OPDS2LinkRelationsRegistry.BUY.key, OPDS2LinkRelationsRegistry.OPEN_ACCESS.key, OPDS2LinkRelationsRegistry.BORROW.key, OPDS2LinkRelationsRegistry.SAMPLE.key, OPDS2LinkRelationsRegistry.SUBSCRIBE.key, ] for link in node.links: if link.rels is not None and set(acquisition_links) & set( link.rels): break else: raise MISSING_ACQUISITION_LINK self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): """Perform semantic analysis of the manifest node. :param node: Manifest-like node :type node: Manifestlike """ self._logger.debug(u"Started processing {0}".format(encode(node))) node.metadata.accept(self) node.links.accept(self) for link in node.links: if not link.rels: raise MISSING_MANIFEST_LINK_REL_PROPERTY_ERROR self_link = first_or_default( node.links.get_by_rel(RWPMLinkRelationsRegistry.SELF.key) ) if self_link is None: raise MISSING_SELF_LINK_ERROR parser = URIParser() try: parser.parse(self_link.href) except ValueParsingError: raise WRONG_SELF_LINK_HREF_FORMAT node.sub_collections.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the manifest's metadata. :param node: Manifest's metadata :type node: Metadata """ self._logger.debug(u"Started processing {0}".format(encode(node))) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def _extract_image_links(self, publication, feed_self_url): """Extracts a list of LinkData objects containing information about artwork. :param publication: Publication object :type publication: ast_core.Publication :param feed_self_url: Feed's self URL :type feed_self_url: str :return: List of links metadata :rtype: List[LinkData] """ self._logger.debug("Started extracting image links from {0}".format( encode(publication.images))) if not publication.images: return [] # FIXME: This code most likely will not work in general. # There's no guarantee that these images have the same media type, # or that the second-largest image isn't far too large to use as a thumbnail. # Instead of using the second-largest image as a thumbnail, # find the image that would make the best thumbnail # because of its dimensions, media type, and aspect ratio: # IDEAL_COVER_ASPECT_RATIO = 2.0/3 # IDEAL_IMAGE_HEIGHT = 240 # IDEAL_IMAGE_WIDTH = 160 sorted_raw_image_links = list( reversed( sorted( publication.images.links, key=lambda link: (link.width or 0, link.height or 0), ))) image_links = [] if len(sorted_raw_image_links) > 0: cover_link = self._extract_link( sorted_raw_image_links[0], feed_self_url, default_link_rel=Hyperlink.IMAGE, ) image_links.append(cover_link) if len(sorted_raw_image_links) > 1: cover_link = self._extract_link( sorted_raw_image_links[1], feed_self_url, default_link_rel=Hyperlink.THUMBNAIL_IMAGE, ) image_links.append(cover_link) self._logger.debug( "Finished extracting image links from {0}: {1}".format( encode(publication.images), encode(image_links))) return image_links
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the compact collection node. :param node: Collection node :type node: CompactCollection """ self._logger.debug(u"Started processing {0}".format(encode(node))) node.links.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the list of sub-collections. :param node: CollectionList node :type node: CollectionList """ self._logger.debug(u"Started processing {0}".format(encode(node))) for collection in node: collection.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the list of links. :param node: Manifest's metadata :type node: LinkList """ self._logger.debug(u"Started processing {0}".format(encode(node))) for link in node: link.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the link node. :param node: Link node :type node: Link """ self._logger.debug(u"Started processing {0}".format(encode(node))) if not node.templated: parser = URIReferenceParser() parser.parse(node.href) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the OPDS 2.0 navigation. :param node: OPDS 2.0 navigation :type node: OPDS2Navigation """ self._logger.debug(u"Started processing {0}".format(encode(node))) self.visit(cast(node, CompactCollection)) for link in node.links: if link.title is None: raise MISSING_NAVIGATION_LINK_TITLE_ERROR self._logger.debug(u"Finished processing {0}".format(encode(node)))
def _set_scalar_value(self, json_content, ast_object): """Parse a scalar string value and initialize an object's property with it. :param json_content: Scalar string value containing a required object's property :type json_content: str :param ast_object: AST object :type ast_object: Node """ required_object_properties = PropertiesGrouping.get_required_class_properties( ast_object.__class__) if len(required_object_properties) != 1: raise BaseSyntaxError( u"There are {0} required properties in {1} but only a single value ({2} was provided" .format(len(required_object_properties), encode(ast_object), json_content)) required_object_property_name, required_object_property = first_or_default( required_object_properties) self._set_property_value( ast_object, required_object_property_name, required_object_property, json_content, ) # We need to initialize other properties with default values self._set_non_scalar_value(None, ast_object, {required_object_property_name})
def _set_property_value(self, ast_object, object_property_name, object_property, property_value): """Set the value of the specified property. :param ast_object: AST object :type ast_object: Node :param object_property_name: Name of the property :type object_property_name: str :param object_property: Object's property :type object_property: Property :param property_value: Value to be set :type property_value: Any """ self._logger.debug( u"Property '{0}' has the following value: {1}".format( object_property.key, encode(property_value))) if property_value is None and object_property.default_value is not None: property_value = object_property.default_value if object_property.required and property_value is None: raise MissingPropertyError(ast_object.__class__, object_property) if property_value is not None: property_value = object_property.parser.parse(property_value) property_value = self._format_property_value(property_value, object_property) setattr(ast_object, object_property_name, property_value)
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the collection node. :param node: Collection node :type node: Collection """ self._logger.debug(u"Started processing {0}".format(encode(node))) with self._record_errors(): node.metadata.accept(self) with self._record_errors(): node.links.accept(self) with self._record_errors(): node.sub_collections.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def _extract_media_types_and_drm_scheme_from_link(self, link): """Extract information about content's media type and used DRM schema from the link. :param link: Link object :type link: ast_core.Link :return: 2-tuple containing information about the content's media type and its DRM schema :rtype: List[Tuple[str, str]] """ self._logger.debug( "Started extracting media types and a DRM scheme from {0}".format( encode(link))) media_types_and_drm_scheme = [] if link.properties: if (not link.properties.availability or link.properties.availability.state == opds2_ast.OPDS2AvailabilityType.AVAILABLE.value): for acquisition_object in link.properties.indirect_acquisition: nested_acquisition_object = acquisition_object while nested_acquisition_object.child: nested_acquisition_object = first_or_default( acquisition_object.child) drm_scheme = (acquisition_object.type if acquisition_object.type in DeliveryMechanism.KNOWN_DRM_TYPES else DeliveryMechanism.NO_DRM) media_types_and_drm_scheme.append( (nested_acquisition_object.type, drm_scheme)) else: if (link.type in MediaTypes.BOOK_MEDIA_TYPES or link.type in MediaTypes.AUDIOBOOK_MEDIA_TYPES): media_types_and_drm_scheme.append( (link.type, DeliveryMechanism.NO_DRM)) self._logger.debug( "Finished extracting media types and a DRM scheme from {0}: {1}". format(encode(link), encode(media_types_and_drm_scheme))) return media_types_and_drm_scheme
def _extract_contributors(self, contributors, default_role=Contributor.AUTHOR_ROLE): """Extract a list of ContributorData objects from the webpub-manifest-parser's contributor. :param contributors: Parsed contributor object :type contributors: List[core_ast.Contributor] :param default_role: Default role :type default_role: Optional[str] :return: List of contributors metadata :rtype: List[ContributorData] """ self._logger.debug("Started extracting contributors metadata") contributor_metadata_list = [] for contributor in contributors: self._logger.debug( "Started extracting contributor metadata from {0}".format( encode(contributor))) contributor_metadata = ContributorData( sort_name=contributor.sort_as, display_name=contributor.name, family_name=None, wikipedia_name=None, roles=contributor.roles if contributor.roles else default_role, ) self._logger.debug( "Finished extracting contributor metadata from {0}: {1}". format(encode(contributor), encode(contributor_metadata))) contributor_metadata_list.append(contributor_metadata) self._logger.debug( "Finished extracting contributors metadata: {0}".format( encode(contributor_metadata_list))) return contributor_metadata_list
def visit(self, node): # pylint: disable=E0102 """Perform semantic analysis of the OPDS 2.0 publication. :param node: ODL 2.0 publication :type node: ODLPublication """ self._logger.debug(u"Started processing {0}".format(encode(node))) if (not node.licenses or len(node.licenses) == 0) and ( (not node.licenses or len(node.links) == 0) or not node.links.get_by_rel(OPDS2LinkRelationsRegistry.OPEN_ACCESS.key) ): with self._record_errors(): raise ODL_PUBLICATION_MUST_CONTAIN_EITHER_LICENSES_OR_OA_ACQUISITION_LINK_ERROR( node=node, node_property=None ) elif node.licenses: node.licenses.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def _parse_nested_object(self, property_value, object_property): """Parse nested object(s) (if any) and return the result of parsing. :param property_value: Raw property's value (probably) containing nested object(s) :type property_value: Any :param object_property: Object's property :type object_property: properties.Property :return: Nested object's value :rtype: Any """ if property_value is None: return property_value self._logger.debug( u"Started looking for nested property {0}".format(object_property)) type_parsers_result = find_parser(object_property.parser, TypeParser) self._logger.debug(u"Found the following type parsers: {0}".format( type_parsers_result)) found = False for parent_parser, type_parser in type_parsers_result: if isinstance(parent_parser, ArrayParser) and isinstance( property_value, list): processed_items = [] for item in property_value: processed_item = self._parse_object(item, type_parser.type) processed_items.append(processed_item) found = True property_value = processed_items break else: for parent_parser, type_parser in type_parsers_result: if not isinstance(parent_parser, ArrayParser): found = True property_value = self._parse_object( property_value, type_parser.type) break if found: self._logger.debug( u"Finished parsing nested property {0}: {1}".format( object_property, encode(property_value))) else: self._logger.debug( u"Property {0} is not nested".format(object_property)) return property_value
def _extract_subjects(self, subjects): """Extract a list of SubjectData objects from the webpub-manifest-parser's subject. :param subjects: Parsed subject object :type subjects: List[core_ast.Subject] :return: List of subjects metadata :rtype: List[SubjectMetadata] """ self._logger.debug("Started extracting subjects metadata") subject_metadata_list = [] for subject in subjects: self._logger.debug( "Started extracting subject metadata from {0}".format( encode(subject))) scheme = subject.scheme subject_type = Subject.by_uri.get(scheme) if not subject_type: # We can't represent this subject because we don't # know its scheme. Just treat it as a tag. subject_type = Subject.TAG subject_metadata = SubjectData(type=subject_type, identifier=subject.code, name=subject.name, weight=1) subject_metadata_list.append(subject_metadata) self._logger.debug( "Finished extracting subject metadata from {0}: {1}".format( encode(subject), encode(subject_metadata))) self._logger.debug("Finished extracting subjects metadata: {0}".format( encode(subject_metadata_list))) return subject_metadata_list
def _extract_link(self, link, feed_self_url, default_link_rel=None): """Extract a LinkData object from webpub-manifest-parser's link. :param link: webpub-manifest-parser's link :type link: ast_core.Link :param feed_self_url: Feed's self URL :type feed_self_url: str :param default_link_rel: Default link's relation :type default_link_rel: Optional[str] :return: Link metadata :rtype: LinkData """ self._logger.debug("Started extracting link metadata from {0}".format( encode(link))) # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one. rights_uri = RightsStatus.rights_uri_from_string("") rel = first_or_default(link.rels, default_link_rel) media_type = link.type href = link.href if feed_self_url and not urlparse(href).netloc: # This link is relative, so we need to get the absolute url href = urljoin(feed_self_url, href) link_metadata = LinkData( rel=rel, href=href, media_type=media_type, rights_uri=rights_uri, content=None, ) self._logger.debug( "Finished extracting link metadata from {0}: {1}".format( encode(link), encode(link_metadata))) return link_metadata
def _extract_image_links(self, publication, feed_self_url): """Extracts a list of LinkData objects containing information about artwork. :param publication: Publication object :type publication: ast_core.Publication :param feed_self_url: Feed's self URL :type feed_self_url: str :return: List of links metadata :rtype: List[LinkData] """ self._logger.debug("Started extracting image links from {0}".format( encode(publication.images))) image_links = [] for image_link in publication.images.links: thumbnail_link = self._extract_link( image_link, feed_self_url, default_link_rel=Hyperlink.THUMBNAIL_IMAGE, ) thumbnail_link.rel = Hyperlink.THUMBNAIL_IMAGE cover_link = self._extract_link( image_link, feed_self_url, default_link_rel=Hyperlink.IMAGE, ) cover_link.rel = Hyperlink.IMAGE cover_link.thumbnail = thumbnail_link image_links.append(cover_link) self._logger.debug( "Finished extracting image links from {0}: {1}".format( encode(publication.images), encode(image_links))) return image_links
def visit(self, node): """Perform semantic analysis of the manifest node. :param node: Manifest-like node :type node: Manifestlike """ self._logger.debug(u"Started processing {0}".format(encode(node))) self.context.reset() with self._record_errors(): node.metadata.accept(self) with self._record_errors(): node.links.accept(self) with self._record_errors(): self._check_manifest_self_link(node) with self._record_errors(): node.sub_collections.accept(self) self._logger.debug(u"Finished processing {0}".format(encode(node)))
def parse(self, value): """Parse a string value. :param value: Value :type value: Any :return: Parsed string value :rtype: str :raise: ValidationError """ if not is_string(value): raise ValueParsingError(u"Value '{0}' must be a string".format( encode(value))) return value
def parse(self, value): """Check that the value has the correct type. :param value: Value :type value: Any :return: Value :rtype: Any :raise: ValidationError """ if not isinstance(value, self._type): raise ValueParsingError( u"Value '{0}' must be an instance of '{1}'".format( encode(value), self._type)) return value
def parse(self, value): """Make sure that the value is a part of the enumeration and return it back. :param value: Value :type value: Any :return: Parsed string value :rtype: int :raise: ValidationError """ value = super(EnumParser, self).parse(value) if value not in self._items: raise ValueParsingError(u"Value '{0}' is not among {1}".format( encode(value), self._items)) return value