def special_quad_patterns(cls, alias_maker, discussion_id): discussion_alias = alias_maker.get_reln_alias(cls.discussion) return [ QuadMapPatternS( None, FOAF.homepage, PatternIriClass( QUADNAMES.post_external_link_iri, # TODO: Use discussion.get_base_url. # This should be computed outside the DB. get_global_base_url() + '/%s/posts/local:Content/%d', None, ('slug', Unicode, False), ('id', Integer, False)).apply( discussion_alias.slug, cls.id), name=QUADNAMES.post_external_link_map) ]
class Extract(IdeaContentPositiveLink): """ An extracted part of a Content. A quotation to be referenced by an `Idea`. """ __tablename__ = 'extract' rdf_class = CATALYST.Excerpt # Extract ID represents both the oa:Annotation and the oa:SpecificResource # TODO: This iri is not yet dereferencable. specific_resource_iri = PatternIriClass( QUADNAMES.oa_specific_resource_iri, get_global_base_url() + '/data/SpecificResource/%d', None, ('id', Integer, False)) id = Column(Integer, ForeignKey('idea_content_positive_link.id', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True, info={'rdf': QuadMapPatternS(None, ASSEMBL.db_id)}) graph_iri_class = PatternIriClass( QUADNAMES.ExcerptGraph_iri, get_global_base_url() + '/data/ExcerptGraph/%d', None, ('id', Integer, False)) # TODO: body was misused to contain the extract fragment content, # which should belong in the TextFragmentIdentifier, # whereas it was meant to be a comment on the extract # if used from the Web annotator. I'll have to migrate it. body = Column(UnicodeText, nullable=False) # info={'rdf': QuadMapPatternS(None, OA.hasBody)}) discussion_id = Column( Integer, ForeignKey('discussion.id', ondelete="CASCADE", onupdate="CASCADE"), nullable=False, index=True, info={'rdf': QuadMapPatternS(None, CATALYST.relevantToConversation)}) discussion = relationship( Discussion, backref=backref('extracts', cascade="all, delete-orphan"), info={'rdf': QuadMapPatternS(None, ASSEMBL.in_conversation)}) important = Column('important', Boolean, server_default='0') def extract_graph_name(self): from pyramid.threadlocal import get_current_registry reg = get_current_registry() host = reg.settings['public_hostname'] return URIRef('http://%s/data/ExcerptGraph/%d' % (host, self.id)) def extract_graph_iri(self): return getattr(QUADNAMES, 'extract_%d_iri' % self.id) @classmethod def special_quad_patterns(cls, alias_maker, discussion_id): return [ QuadMapPatternS(None, OA.hasBody, cls.graph_iri_class.apply(cls.id), name=QUADNAMES.oa_hasBody, conditions=((cls.idea_id != None), (Idea.tombstone_date == None))), QuadMapPatternS( #Content.iri_class().apply(cls.content_id), cls.specific_resource_iri.apply(cls.id), # It would be better to use CATALYST.expressesIdea, # but Virtuoso hates the redundancy. ASSEMBL.resourceExpressesIdea, Idea.iri_class().apply(cls.idea_id), name=QUADNAMES.assembl_postExtractRelatedToIdea, conditions=( (cls.idea_id != None), (Idea.tombstone_date == None) # and it's a post extract... treat webpages separately. )), QuadMapPatternS(None, OA.hasTarget, cls.specific_resource_iri.apply(cls.id), name=QUADNAMES.oa_hasTarget), QuadMapPatternS(cls.specific_resource_iri.apply(cls.id), RDF.type, OA.SpecificResource, name=QUADNAMES.oa_SpecificResource_type), QuadMapPatternS( cls.specific_resource_iri.apply(cls.id), ASSEMBL.in_conversation, Discussion.iri_class().apply(cls.discussion_id), name=QUADNAMES.oa_SpecificResource_in_conversation), QuadMapPatternS(cls.specific_resource_iri.apply(cls.id), OA.hasSource, Content.iri_class().apply(cls.content_id), name=QUADNAMES.oa_hasSource), # TODO: Paths # QuadMapPatternS( # AgentProfile.iri_class().apply((cls.content_id, Post.creator_id)), # DCTERMS.contributor, # Idea.iri_class().apply(cls.idea_id), # name=QUADNAMES.assembl_idea_contributor, # conditions=(cls.idea_id != None,)), ] annotation_text = Column(UnicodeText) owner_id = Column( Integer, ForeignKey('agent_profile.id'), nullable=False, ) owner = relationship('AgentProfile', foreign_keys=[owner_id], backref='extracts_owned') extract_source = relationship(Content, backref="extracts") extract_ideas = relationship(Idea, backref="extracts") __mapper_args__ = { 'polymorphic_identity': 'assembl:postExtractRelatedToIdea', } @property def target(self): retval = {'@type': self.content.external_typename()} if isinstance(self.content, Post): retval['@id'] = Post.uri_generic(self.content.id) elif self.content.type == 'webpage': retval['url'] = self.content.url return retval def __repr__(self): r = super(Extract, self).__repr__() body = self.body or "" return r[:-1] + body[:20].encode("ascii", "ignore") + ">" def get_target(self): return self.content def get_post(self): if isinstance(self.content, Post): return self.content def infer_text_fragment(self): return self._infer_text_fragment_inner(self.content.get_title(), self.content.get_body(), self.get_post().id) def _infer_text_fragment_inner(self, title, body, post_id): # dead code? If not needs to be refactored with langstrings body = IMAPMailbox.sanitize_html(body, []) quote = self.body.replace("\r", "") try: # for historical reasons quote = quopri.decodestring(quote) except: pass quote = IMAPMailbox.sanitize_html(quote, []) if quote != self.body: self.body = quote quote = quote.replace("\n", "") start = body.find(quote) lookin = 'message-body' if start < 0: xpath = "//div[@id='%s']/div[class='post_title']" % (post_id) start = title.find(quote) if start < 0: return None lookin = 'message-subject' xpath = "//div[@id='message-%s']//div[@class='%s']" % ( Post.uri_generic(post_id), lookin) tfi = self.db.query(TextFragmentIdentifier).filter_by( extract=self).first() if not tfi: tfi = TextFragmentIdentifier(extract=self) tfi.xpath_start = tfi.xpath_end = xpath tfi.offset_start = start tfi.offset_end = start + len(quote) return tfi def send_to_changes(self, connection=None, operation=CrudOperation.UPDATE, discussion_id=None, view_def="changes"): """invoke the modelWatcher on creation""" super(Extract, self).send_to_changes(connection, operation, discussion_id, view_def) watcher = get_model_watcher() if operation == CrudOperation.UPDATE: watcher.processExtractModified(self.id, 0) # no versions yet. elif operation == CrudOperation.DELETE: watcher.processExtractDeleted(self.id) elif operation == CrudOperation.CREATE: watcher.processExtractCreated(self.id) def get_discussion_id(self): return self.discussion_id @classmethod def get_discussion_conditions(cls, discussion_id, alias_maker=None): return (cls.discussion_id == discussion_id, ) @classmethod def base_conditions(cls, alias=None, alias_maker=None): # Allow idea-less extracts return () @classmethod def restrict_to_owners(cls, query, user_id): "filter query according to object owners" return query.filter(cls.owner_id == user_id) crud_permissions = CrudPermissions(P_ADD_EXTRACT, P_READ, P_EDIT_EXTRACT, P_EDIT_EXTRACT, P_EDIT_MY_EXTRACT, P_EDIT_MY_EXTRACT)
class Extract(IdeaContentPositiveLink): """ An extracted part of a Content. A quotation to be referenced by an `Idea`. """ __tablename__ = 'extract' __external_typename = "Excerpt" rdf_class = CATALYST.Excerpt # Extract ID represents both the oa:Annotation and the oa:SpecificResource # TODO: This iri is not yet dereferencable. specific_resource_iri = PatternIriClass( QUADNAMES.oa_specific_resource_iri, get_global_base_url() + '/data/SpecificResource/%d', None, ('id', Integer, False)) id = Column(Integer, ForeignKey('idea_content_positive_link.id', ondelete='CASCADE', onupdate='CASCADE'), primary_key=True, info={'rdf': QuadMapPatternS(None, ASSEMBL.db_id)}) graph_iri_class = PatternIriClass( QUADNAMES.ExcerptGraph_iri, get_global_base_url() + '/data/ExcerptGraph/%d', None, ('id', Integer, False)) annotation_text = Column(UnicodeText) # info={'rdf': QuadMapPatternS(None, OA.hasBody)}) discussion_id = Column( Integer, ForeignKey('discussion.id', ondelete="CASCADE", onupdate="CASCADE"), nullable=False, index=True, info={'rdf': QuadMapPatternS(None, CATALYST.relevantToConversation)}) discussion = relationship( Discussion, backref=backref('extracts', cascade="all, delete-orphan"), info={'rdf': QuadMapPatternS(None, ASSEMBL.in_conversation)}) important = Column(Boolean, server_default='0') external_url = Column(URLString) attributed_to_id = Column( Integer, ForeignKey(AgentProfile.id, ondelete='SET NULL', onupdate='CASCADE')) def local_uri_as_graph(self): return 'local:ExcerptGraph/%d' % (self.id, ) def local_uri_as_resource(self): return 'local:SpecificResource/%d' % (self.id, ) def extract_graph_name(self): from pyramid.threadlocal import get_current_registry reg = get_current_registry() host = reg.settings['public_hostname'] return URIRef('http://%s/data/ExcerptGraph/%d' % (host, self.id)) def fragements_as_web_annotatation(self): return sum((tfi.as_web_annotation() for tfi in self.selectors), []) def extract_graph_json(self): return { "@graph": [{ "expressesIdea": Idea.uri_generic(self.idea_id), "@id": self.local_uri_as_resource() }], "@id": self.local_uri_as_graph() } def extract_graph_json_wrap(self): return { "@context": [context_url, { 'local': get_global_base_url() }], "@graph": [self.extract_graph_json()] } def extract_graph_json_wrap_flat(self): return json.dumps(self.extract_graph_json_wrap()) def extract_graph_iri(self): return getattr(QUADNAMES, 'extract_%d_iri' % self.id) @classmethod def special_quad_patterns(cls, alias_maker, discussion_id): return [ QuadMapPatternS(None, OA.hasBody, cls.graph_iri_class.apply(cls.id), name=QUADNAMES.oa_hasBody, conditions=((cls.idea_id != None), (Idea.tombstone_date == None))), QuadMapPatternS( #Content.iri_class().apply(cls.content_id), cls.specific_resource_iri.apply(cls.id), # It would be better to use CATALYST.expressesIdea, # but Virtuoso hates the redundancy. ASSEMBL.resourceExpressesIdea, Idea.iri_class().apply(cls.idea_id), name=QUADNAMES.assembl_postExtractRelatedToIdea, conditions=( (cls.idea_id != None), (Idea.tombstone_date == None) # and it's a post extract... treat webpages separately. )), QuadMapPatternS(None, OA.hasTarget, cls.specific_resource_iri.apply(cls.id), name=QUADNAMES.oa_hasTarget), QuadMapPatternS(cls.specific_resource_iri.apply(cls.id), RDF.type, OA.SpecificResource, name=QUADNAMES.oa_SpecificResource_type), QuadMapPatternS( cls.specific_resource_iri.apply(cls.id), ASSEMBL.in_conversation, Discussion.iri_class().apply(cls.discussion_id), name=QUADNAMES.oa_SpecificResource_in_conversation), QuadMapPatternS(cls.specific_resource_iri.apply(cls.id), OA.hasSource, Content.iri_class().apply(cls.content_id), name=QUADNAMES.oa_hasSource), # TODO: Paths # QuadMapPatternS( # AgentProfile.iri_class().apply((cls.content_id, Post.creator_id)), # DCTERMS.contributor, # Idea.iri_class().apply(cls.idea_id), # name=QUADNAMES.assembl_idea_contributor, # conditions=(cls.idea_id != None,)), ] owner_id = Column( Integer, ForeignKey('agent_profile.id'), nullable=False, ) owner = relationship(AgentProfile, foreign_keys=[owner_id], backref='extracts_owned') attributed_to = relationship(AgentProfile, foreign_keys=[attributed_to_id], backref='extracts_attributed') extract_source = relationship(Content, backref="extracts") extract_ideas = relationship(Idea, backref="extracts") __mapper_args__ = { 'polymorphic_identity': 'assembl:postExtractRelatedToIdea', } @property def target(self): retval = {'@type': self.content.external_typename()} if isinstance(self.content, Post): retval['@id'] = Post.uri_generic(self.content.id) elif self.content.type == 'webpage': retval['url'] = self.content.url subject = self.content.subject if subject: retval['title'] = subject.first_original().value return retval @as_native_str() def __repr__(self): r = super(Extract, self).__repr__() body = self.quote or "" return r[:-1] + body[:20] + ">" def populate_from_context(self, context): if not (self.owner or self.owner_id): self.owner_id = context.get_user_id() super(Extract, self).populate_from_context(context) def get_target(self): return self.content def get_post(self): if isinstance(self.content, Post): return self.content def infer_text_fragment(self): return self._infer_text_fragment_inner(self.content.get_title(), self.content.get_body(), self.get_post().id) @property def quote(self): return ' '.join( (tf.body for tf in self.selectors if getattr(tf, 'body', None))) def _infer_text_fragment_inner(self, title, body, post_id): # dead code? If not needs to be refactored with langstrings # and moved within text_fragment, maybe? body = sanitize_html(body, []) quote = self.quote.replace("\r", "") try: # for historical reasons quote = quopri.decodestring(quote) except: pass quote = sanitize_html(quote, []) if quote != self.body: self.body = quote quote = quote.replace("\n", "") start = body.find(quote) lookin = 'message-body' if start < 0: xpath = "//div[@id='%s']/div[class='post_title']" % (post_id) start = title.find(quote) if start < 0: return None lookin = 'message-subject' xpath = "//div[@id='message-%s']//div[@class='%s']" % ( Post.uri_generic(post_id), lookin) tfi = self.db.query(TextFragmentIdentifier).filter_by( extract=self).first() if not tfi: tfi = TextFragmentIdentifier(extract=self) tfi.xpath_start = tfi.xpath_end = xpath tfi.offset_start = start tfi.offset_end = start + len(quote) return tfi def send_to_changes(self, connection=None, operation=CrudOperation.UPDATE, discussion_id=None, view_def="changes"): """invoke the modelWatcher on creation""" super(Extract, self).send_to_changes(connection, operation, discussion_id, view_def) watcher = get_model_watcher() if operation == CrudOperation.UPDATE: watcher.processExtractModified(self.id, 0) # no versions yet. elif operation == CrudOperation.DELETE: watcher.processExtractDeleted(self.id) elif operation == CrudOperation.CREATE: watcher.processExtractCreated(self.id) def get_discussion_id(self): return self.discussion_id or self.discussion.id @classmethod def get_discussion_conditions(cls, discussion_id, alias_maker=None): return (cls.discussion_id == discussion_id, ) @classmethod def base_conditions(cls, alias=None, alias_maker=None): # Allow idea-less extracts return () @classmethod def restrict_to_owners_condition(cls, query, user_id, alias=None, alias_maker=None): if not alias: if alias_maker: alias = alias_maker.alias_from_class(cls) else: alias = cls return (query, alias.owner_id == user_id) crud_permissions = CrudPermissions(P_ADD_EXTRACT, P_READ, P_EDIT_EXTRACT, P_EDIT_EXTRACT)