def _normalize_document_uris_window(session, window): query = session.query(models.DocumentURI) \ .filter(models.DocumentURI.updated.between(window.start, window.end)) \ .order_by(models.DocumentURI.updated.asc()) for docuri in query: documents = models.Document.find_by_uris(session, [docuri.uri]) if documents.count() > 1: models.merge_documents(session, documents) existing = session.query(models.DocumentURI).filter( models.DocumentURI.id != docuri.id, models.DocumentURI.document_id == docuri.document_id, models.DocumentURI.claimant_normalized == uri.normalize( docuri.claimant), models.DocumentURI.uri_normalized == uri.normalize(docuri.uri), models.DocumentURI.type == docuri.type, models.DocumentURI.content_type == docuri.content_type) if existing.count() > 0: session.delete(docuri) else: docuri._claimant_normalized = uri.normalize(docuri.claimant) docuri._uri_normalized = uri.normalize(docuri.uri) session.flush()
def _normalize_document_uris_window(session, window): query = session.query(models.DocumentURI) \ .filter(models.DocumentURI.updated.between(window.start, window.end)) \ .order_by(models.DocumentURI.updated.asc()) for docuri in query: documents = models.Document.find_by_uris(session, [docuri.uri]) if documents.count() > 1: models.merge_documents(session, documents) existing = session.query(models.DocumentURI).filter( models.DocumentURI.id != docuri.id, models.DocumentURI.document_id == docuri.document_id, models.DocumentURI.claimant_normalized == uri.normalize(docuri.claimant), models.DocumentURI.uri_normalized == uri.normalize(docuri.uri), models.DocumentURI.type == docuri.type, models.DocumentURI.content_type == docuri.content_type) if existing.count() > 0: session.delete(docuri) else: docuri._claimant_normalized = uri.normalize(docuri.claimant) docuri._uri_normalized = uri.normalize(docuri.uri) session.flush()
def _normalize_document_meta_window(session, window): query = session.query(models.DocumentMeta) \ .filter(models.DocumentMeta.updated.between(window.start, window.end)) \ .order_by(models.DocumentMeta.updated.asc()) for docmeta in query: existing = session.query(models.DocumentMeta).filter( models.DocumentMeta.id != docmeta.id, models.DocumentMeta.claimant_normalized == uri.normalize( docmeta.claimant), models.DocumentMeta.type == docmeta.type) if existing.count() > 0: session.delete(docmeta) else: docmeta._claimant_normalized = uri.normalize(docmeta.claimant) session.flush()
def _normalize_document_meta_window(session, window): query = session.query(models.DocumentMeta) \ .filter(models.DocumentMeta.updated.between(window.start, window.end)) \ .order_by(models.DocumentMeta.updated.asc()) for docmeta in query: existing = session.query(models.DocumentMeta).filter( models.DocumentMeta.id != docmeta.id, models.DocumentMeta.claimant_normalized == uri.normalize(docmeta.claimant), models.DocumentMeta.type == docmeta.type) if existing.count() > 0: session.delete(docmeta) else: docmeta._claimant_normalized = uri.normalize(docmeta.claimant) session.flush()
def _normalize_annotations_window(session, window): query = session.query(models.Annotation) \ .filter(models.Annotation.updated.between(window.start, window.end)) \ .order_by(models.Annotation.updated.asc()) ids = set() for a in query: normalized = uri.normalize(a.target_uri) if normalized != a.target_uri_normalized: a._target_uri_normalized = normalized ids.add(a.id) return ids
def __call__(self, params): if 'uri' not in params: return None query_uris = [v for k, v in params.items() if k == 'uri'] del params['uri'] uris = set() for query_uri in query_uris: expanded = storage.expand_uri(self.request.db, query_uri) us = [uri.normalize(u) for u in expanded] uris.update(us) return {"terms": {"target.scope": list(uris)}}
def list(self, user, group=None, uris=None): """ Return a list of flags made by the given user. :param user: The user to filter flags on. :type user: h.models.User :param group: The annotation group pubid for filtering flags. :type group: unicode :param uris: A list of annotation uris for filtering flags. :type uris: list of unicode :returns: list of flags (``h.models.Flag``) :rtype: list """ query = self.session.query(models.Flag).filter_by(user=user) joined_annotation = False if group is not None: joined_annotation = True query = query.join(models.Annotation) \ .filter(models.Annotation.groupid == group) if uris: query_uris = set() for u in uris: expanded = storage.expand_uri(self.session, u) query_uris.update([uri.normalize(e) for e in expanded]) if not joined_annotation: joined_annotation = True query = query.join(models.Annotation) query = query.filter( models.Annotation.target_uri_normalized.in_(query_uris)) return query
def _fetch_annotations(session, uri_): return session.query(models.Annotation).filter( models.Annotation.target_uri_normalized == uri.normalize(uri_)).all()
def target_uri(self, value): self._target_uri = value self._target_uri_normalized = uri.normalize(value)
def test_normalize(url_in, url_out): assert uri.normalize(url_in) == url_out
def test_normalize_returns_unicode(url, _): assert isinstance(uri.normalize(url), text_type)
def _fetch_document_uri_canonical_self_claim(session, uri_): return session.query(models.DocumentURI).filter( models.DocumentURI.uri_normalized == uri.normalize(uri_), models.DocumentURI.type.in_([u'self-claim', u'rel-canonical'])).all()
def _fetch_document_uri_claimants(session, uri_): return session.query(models.DocumentURI).filter( models.DocumentURI.claimant_normalized == uri.normalize(uri_)).all()