Exemple #1
0
    def test_expand_uri_document_uris(self, db_session):
        document = Document(document_uris=[
            DocumentURI(uri='http://foo.com/', claimant='http://bar.com'),
            DocumentURI(uri='http://bar.com/', claimant='http://bar.com'),
        ])
        db_session.add(document)
        db_session.flush()

        assert storage.expand_uri(db_session, 'http://foo.com/') == [
            'http://foo.com/', 'http://bar.com/'
        ]
Exemple #2
0
def test_expand_uri_postgres_document_uris(postgres_enabled):
    request = DummyRequest(db=db.Session)
    postgres_enabled.return_value = True

    document = Document(document_uris=[
        DocumentURI(uri='http://foo.com/', claimant='http://bar.com'),
        DocumentURI(uri='http://bar.com/', claimant='http://bar.com'),
    ])
    db.Session.add(document)
    db.Session.flush()

    assert storage.expand_uri(
        request, 'http://foo.com/') == ['http://foo.com/', 'http://bar.com/']
Exemple #3
0
    def test_expand_uri_document_doesnt_expand_canonical_uris(
            self, db_session):
        document = Document(document_uris=[
            DocumentURI(uri='http://foo.com/', claimant='http://example.com'),
            DocumentURI(uri='http://bar.com/', claimant='http://example.com'),
            DocumentURI(uri='http://example.com/',
                        type='rel-canonical',
                        claimant='http://example.com'),
        ])
        db_session.add(document)
        db_session.flush()

        assert storage.expand_uri(
            db_session, "http://example.com/") == ["http://example.com/"]
Exemple #4
0
def test_document_not_found(annotation):
    document = Document(document_uris=[
        DocumentURI(claimant='something-else', uri='something-else')
    ])
    db.Session.add(document)
    db.Session.flush()

    assert annotation.document is None
Exemple #5
0
def test_document(annotation):
    document = Document(document_uris=[
        DocumentURI(claimant=annotation.target_uri, uri=annotation.target_uri)
    ])
    db.Session.add(document)
    db.Session.flush()

    assert annotation.document == document
Exemple #6
0
def test_expand_uri_postgres_document_doesnt_expand_canonical_uris(
        postgres_enabled):
    request = DummyRequest(db=db.Session)
    postgres_enabled.return_value = True

    document = Document(document_uris=[
        DocumentURI(uri='http://foo.com/', claimant='http://example.com'),
        DocumentURI(uri='http://bar.com/', claimant='http://example.com'),
        DocumentURI(uri='http://example.com/',
                    type='rel-canonical',
                    claimant='http://example.com'),
    ])
    db.Session.add(document)
    db.Session.flush()

    assert storage.expand_uri(
        request, "http://example.com/") == ["http://example.com/"]
Exemple #7
0
def test_document_find_by_uris_no_matches():
    document = Document()
    document.document_uris.append(DocumentURI(
        claimant='https://en.wikipedia.org/wiki/Main_Page',
        uri='https://en.wikipedia.org/wiki/Main_Page'))
    db.Session.add(document)
    db.Session.flush()

    actual = Document.find_by_uris(db.Session, ['https://de.wikipedia.org/wiki/Hauptseite'])
    assert actual.count() == 0
Exemple #8
0
def test_document_find_by_uris():
    document1 = Document()
    uri1 = 'https://de.wikipedia.org/wiki/Hauptseite'
    document1.document_uris.append(DocumentURI(claimant=uri1, uri=uri1))

    document2 = Document()
    uri2 = 'https://en.wikipedia.org/wiki/Main_Page'
    document2.document_uris.append(DocumentURI(claimant=uri2, uri=uri2))
    uri3 = 'https://en.wikipedia.org'
    document2.document_uris.append(DocumentURI(claimant=uri3, uri=uri2))

    db.Session.add_all([document1, document2])
    db.Session.flush()

    actual = Document.find_by_uris(db.Session, [
        'https://en.wikipedia.org/wiki/Main_Page',
        'https://m.en.wikipedia.org/wiki/Main_Page'])
    assert actual.count() == 1
    assert actual.first() == document2
Exemple #9
0
def create_or_update_document_uri(es_docuri, pg_document):
    docuri = DocumentURI.query.filter(
            DocumentURI.claimant_normalized == es_docuri.claimant_normalized,
            DocumentURI.uri_normalized == es_docuri.uri_normalized,
            DocumentURI.type == es_docuri.type,
            DocumentURI.content_type == es_docuri.content_type).first()

    if docuri is None:
        docuri = DocumentURI(claimant=es_docuri.claimant,
                             uri=es_docuri.uri,
                             type=es_docuri.type,
                             content_type=es_docuri.content_type,
                             document=pg_document,
                             created=es_docuri.created,
                             updated=es_docuri.updated)
        Session.add(docuri)
    elif not docuri.document == pg_document:
        log.warn('Found DocumentURI with id {:d} does not match expected document with id {:d}', docuri.id, pg_document.id)

    docuri.updated = es_docuri.updated
Exemple #10
0
def test_document_find_or_create_by_uris():
    document = Document()
    docuri1 = DocumentURI(
        claimant='https://en.wikipedia.org/wiki/Main_Page',
        uri='https://en.wikipedia.org/wiki/Main_Page',
        document=document)
    docuri2 = DocumentURI(
        claimant='https://en.wikipedia.org/wiki/http/en.m.wikipedia.org/wiki/Main_Page',
        uri='https://en.wikipedia.org/wiki/Main_Page',
        document=document)

    db.Session.add(docuri1)
    db.Session.add(docuri2)
    db.Session.flush()

    actual = Document.find_or_create_by_uris(db.Session,
        'https://en.wikipedia.org/wiki/Main_Page',
        ['https://en.wikipedia.org/wiki/http/en.m.wikipedia.org/wiki/Main_Page',
         'https://m.en.wikipedia.org/wiki/Main_Page'])
    assert actual.count() == 1
    assert actual.first() == document
Exemple #11
0
def merge_data(request):
    master = Document(document_uris=[DocumentURI(
            claimant='https://en.wikipedia.org/wiki/Main_Page',
            uri='https://en.wikipedia.org/wiki/Main_Page',
            type='self-claim')],
            meta=[DocumentMeta(
                claimant='https://en.wikipedia.org/wiki/Main_Page',
                type='title',
                value='Wikipedia, the free encyclopedia')])
    duplicate = Document(document_uris=[DocumentURI(
            claimant='https://m.en.wikipedia.org/wiki/Main_Page',
            uri='https://en.wikipedia.org/wiki/Main_Page',
            type='rel-canonical')],
            meta=[DocumentMeta(
                claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                type='title',
                value='Wikipedia, the free encyclopedia')])

    db.Session.add_all([master, duplicate])
    db.Session.flush()
    return (master, duplicate)
Exemple #12
0
def create_or_update_document_uri(es_docuri, pg_document):
    docuri = DocumentURI.query.filter(
        DocumentURI.claimant_normalized == es_docuri.claimant_normalized,
        DocumentURI.uri_normalized == es_docuri.uri_normalized,
        DocumentURI.type == es_docuri.type,
        DocumentURI.content_type == es_docuri.content_type).first()

    if docuri is None:
        docuri = DocumentURI(claimant=es_docuri.claimant,
                             uri=es_docuri.uri,
                             type=es_docuri.type,
                             content_type=es_docuri.content_type,
                             document=pg_document,
                             created=es_docuri.created,
                             updated=es_docuri.updated)
        Session.add(docuri)
    elif not docuri.document == pg_document:
        log.warn(
            'Found DocumentURI with id %d does not match expected document with id %d',
            docuri.id, pg_document.id)

    docuri.updated = es_docuri.updated
Exemple #13
0
def test_document_find_or_create_by_uris_no_results():
    document = Document()
    docuri = DocumentURI(
        claimant='https://en.wikipedia.org/wiki/Main_Page',
        uri='https://en.wikipedia.org/wiki/Main_Page',
        document=document)

    db.Session.add(docuri)
    db.Session.flush()

    documents = Document.find_or_create_by_uris(db.Session,
        'https://en.wikipedia.org/wiki/Pluto',
        ['https://m.en.wikipedia.org/wiki/Pluto'])

    assert documents.count() == 1

    actual = documents.first()
    assert isinstance(actual, Document)
    assert len(actual.document_uris) == 1

    docuri = actual.document_uris[0]
    assert docuri.claimant == 'https://en.wikipedia.org/wiki/Pluto'
    assert docuri.uri == 'https://en.wikipedia.org/wiki/Pluto'
    assert docuri.type == 'self-claim'