Beispiel #1
0
def _normalize_document_uris_window(session, window):
    query = session.query(models.DocumentURI) \
        .filter(models.DocumentURI.updated.between(window.start, window.end)) \
        .order_by(models.DocumentURI.updated.asc())

    for docuri in query:
        documents = models.Document.find_by_uris(session, [docuri.uri])
        if documents.count() > 1:
            models.merge_documents(session, documents)

        existing = session.query(models.DocumentURI).filter(
            models.DocumentURI.id != docuri.id,
            models.DocumentURI.document_id == docuri.document_id,
            models.DocumentURI.claimant_normalized == uri.normalize(
                docuri.claimant),
            models.DocumentURI.uri_normalized == uri.normalize(docuri.uri),
            models.DocumentURI.type == docuri.type,
            models.DocumentURI.content_type == docuri.content_type)

        if existing.count() > 0:
            session.delete(docuri)
        else:
            docuri._claimant_normalized = uri.normalize(docuri.claimant)
            docuri._uri_normalized = uri.normalize(docuri.uri)

        session.flush()
Beispiel #2
0
def _normalize_document_uris_window(session, window):
    query = session.query(models.DocumentURI) \
        .filter(models.DocumentURI.updated.between(window.start, window.end)) \
        .order_by(models.DocumentURI.updated.asc())

    for docuri in query:
        documents = models.Document.find_by_uris(session, [docuri.uri])
        if documents.count() > 1:
            models.merge_documents(session, documents)

        existing = session.query(models.DocumentURI).filter(
            models.DocumentURI.id != docuri.id,
            models.DocumentURI.document_id == docuri.document_id,
            models.DocumentURI.claimant_normalized == uri.normalize(docuri.claimant),
            models.DocumentURI.uri_normalized == uri.normalize(docuri.uri),
            models.DocumentURI.type == docuri.type,
            models.DocumentURI.content_type == docuri.content_type)

        if existing.count() > 0:
            session.delete(docuri)
        else:
            docuri._claimant_normalized = uri.normalize(docuri.claimant)
            docuri._uri_normalized = uri.normalize(docuri.uri)

        session.flush()
Beispiel #3
0
def _normalize_document_meta_window(session, window):
    query = session.query(models.DocumentMeta) \
        .filter(models.DocumentMeta.updated.between(window.start, window.end)) \
        .order_by(models.DocumentMeta.updated.asc())

    for docmeta in query:
        existing = session.query(models.DocumentMeta).filter(
            models.DocumentMeta.id != docmeta.id,
            models.DocumentMeta.claimant_normalized == uri.normalize(
                docmeta.claimant), models.DocumentMeta.type == docmeta.type)

        if existing.count() > 0:
            session.delete(docmeta)
        else:
            docmeta._claimant_normalized = uri.normalize(docmeta.claimant)

        session.flush()
Beispiel #4
0
def _normalize_document_meta_window(session, window):
    query = session.query(models.DocumentMeta) \
        .filter(models.DocumentMeta.updated.between(window.start, window.end)) \
        .order_by(models.DocumentMeta.updated.asc())

    for docmeta in query:
        existing = session.query(models.DocumentMeta).filter(
            models.DocumentMeta.id != docmeta.id,
            models.DocumentMeta.claimant_normalized == uri.normalize(docmeta.claimant),
            models.DocumentMeta.type == docmeta.type)

        if existing.count() > 0:
            session.delete(docmeta)
        else:
            docmeta._claimant_normalized = uri.normalize(docmeta.claimant)

        session.flush()
Beispiel #5
0
def _normalize_annotations_window(session, window):
    query = session.query(models.Annotation) \
        .filter(models.Annotation.updated.between(window.start, window.end)) \
        .order_by(models.Annotation.updated.asc())

    ids = set()
    for a in query:
        normalized = uri.normalize(a.target_uri)
        if normalized != a.target_uri_normalized:
            a._target_uri_normalized = normalized
            ids.add(a.id)

    return ids
Beispiel #6
0
def _normalize_annotations_window(session, window):
    query = session.query(models.Annotation) \
        .filter(models.Annotation.updated.between(window.start, window.end)) \
        .order_by(models.Annotation.updated.asc())

    ids = set()
    for a in query:
        normalized = uri.normalize(a.target_uri)
        if normalized != a.target_uri_normalized:
            a._target_uri_normalized = normalized
            ids.add(a.id)

    return ids
Beispiel #7
0
    def __call__(self, params):
        if 'uri' not in params:
            return None
        query_uris = [v for k, v in params.items() if k == 'uri']
        del params['uri']

        uris = set()
        for query_uri in query_uris:
            expanded = storage.expand_uri(self.request.db, query_uri)

            us = [uri.normalize(u) for u in expanded]
            uris.update(us)

        return {"terms": {"target.scope": list(uris)}}
Beispiel #8
0
    def list(self, user, group=None, uris=None):
        """
        Return a list of flags made by the given user.

        :param user: The user to filter flags on.
        :type user: h.models.User

        :param group: The annotation group pubid for filtering flags.
        :type group: unicode

        :param uris: A list of annotation uris for filtering flags.
        :type uris: list of unicode

        :returns: list of flags (``h.models.Flag``)
        :rtype: list
        """

        query = self.session.query(models.Flag).filter_by(user=user)

        joined_annotation = False

        if group is not None:
            joined_annotation = True
            query = query.join(models.Annotation) \
                         .filter(models.Annotation.groupid == group)

        if uris:
            query_uris = set()
            for u in uris:
                expanded = storage.expand_uri(self.session, u)
                query_uris.update([uri.normalize(e) for e in expanded])

            if not joined_annotation:
                joined_annotation = True
                query = query.join(models.Annotation)
            query = query.filter(
                models.Annotation.target_uri_normalized.in_(query_uris))

        return query
Beispiel #9
0
def _fetch_annotations(session, uri_):
    return session.query(models.Annotation).filter(
        models.Annotation.target_uri_normalized == uri.normalize(uri_)).all()
Beispiel #10
0
 def target_uri(self, value):
     self._target_uri = value
     self._target_uri_normalized = uri.normalize(value)
Beispiel #11
0
def test_normalize(url_in, url_out):
    assert uri.normalize(url_in) == url_out
Beispiel #12
0
def test_normalize_returns_unicode(url, _):
    assert isinstance(uri.normalize(url), text_type)
Beispiel #13
0
 def target_uri(self, value):
     self._target_uri = value
     self._target_uri_normalized = uri.normalize(value)
Beispiel #14
0
def _fetch_document_uri_canonical_self_claim(session, uri_):
    return session.query(models.DocumentURI).filter(
        models.DocumentURI.uri_normalized == uri.normalize(uri_),
        models.DocumentURI.type.in_([u'self-claim', u'rel-canonical'])).all()
Beispiel #15
0
def test_normalize_returns_unicode(url, _):
    assert isinstance(uri.normalize(url), text_type)
Beispiel #16
0
def _fetch_annotations(session, uri_):
    return session.query(models.Annotation).filter(
        models.Annotation.target_uri_normalized == uri.normalize(uri_)).all()
Beispiel #17
0
def _fetch_document_uri_canonical_self_claim(session, uri_):
    return session.query(models.DocumentURI).filter(
        models.DocumentURI.uri_normalized == uri.normalize(uri_),
        models.DocumentURI.type.in_([u'self-claim', u'rel-canonical'])).all()
Beispiel #18
0
def _fetch_document_uri_claimants(session, uri_):
    return session.query(models.DocumentURI).filter(
        models.DocumentURI.claimant_normalized == uri.normalize(uri_)).all()
Beispiel #19
0
def _fetch_document_uri_claimants(session, uri_):
    return session.query(models.DocumentURI).filter(
        models.DocumentURI.claimant_normalized == uri.normalize(uri_)).all()
Beispiel #20
0
def test_normalize(url_in, url_out):
    assert uri.normalize(url_in) == url_out