def test_it_merges_documents_when_duplicates_found(req):
    docuri_1 = models.DocumentURI(claimant='http://example.org/',
                                  uri='http://example.org/',
                                  type='self-claim')
    docuri_2 = models.DocumentURI(claimant='https://example.net/',
                                  uri='https://example.org/',
                                  type='rel-canonical')

    req.db.add_all([
        models.Document(document_uris=[docuri_1]),
        models.Document(document_uris=[docuri_2]),
    ])
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert req.db.query(models.Document).count() == 1
def test_it_deletes_duplicate_document_uri_objects(req):
    docuri_1 = models.DocumentURI(_claimant='http://example.org/',
                                  _claimant_normalized='http://example.org',
                                  _uri='http://example.org/',
                                  _uri_normalized='http://example.org',
                                  type='self-claim')
    docuri_2 = models.DocumentURI(_claimant='https://example.org/',
                                  _claimant_normalized='https://example.org',
                                  _uri='https://example.org/',
                                  _uri_normalized='https://example.org',
                                  type='self-claim')

    req.db.add(models.Document(document_uris=[docuri_1, docuri_2]))
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert req.db.query(models.DocumentURI).count() == 1
def test_it_normalizes_document_uris_claimant(req):
    docuri_1 = models.DocumentURI(_claimant='http://example.org/',
                                  _claimant_normalized='http://example.org',
                                  _uri='http://example.org/',
                                  _uri_normalized='http://example.org',
                                  type='self-claim')
    docuri_2 = models.DocumentURI(_claimant='http://example.org/',
                                  _claimant_normalized='http://example.org',
                                  _uri='https://example.org/',
                                  _uri_normalized='https://example.org',
                                  type='rel-canonical')

    req.db.add(models.Document(document_uris=[docuri_1, docuri_2]))
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert docuri_1.claimant_normalized == 'httpx://example.org'
    assert docuri_2.claimant_normalized == 'httpx://example.org'
Exemple #4
0
def test_it_normalizes_document_uris_claimant(req):
    docuri_1 = models.DocumentURI(
        _claimant="http://example.org/",
        _claimant_normalized="http://example.org",
        _uri="http://example.org/",
        _uri_normalized="http://example.org",
        type="self-claim",
    )
    docuri_2 = models.DocumentURI(
        _claimant="http://example.org/",
        _claimant_normalized="http://example.org",
        _uri="https://example.org/",
        _uri_normalized="https://example.org",
        type="rel-canonical",
    )

    req.db.add(models.Document(document_uris=[docuri_1, docuri_2]))
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert docuri_1.claimant_normalized == "httpx://example.org"
    assert docuri_2.claimant_normalized == "httpx://example.org"
Exemple #5
0
def test_it_normalizes_document_uris_claimant(req):
    docuri_1 = models.DocumentURI(
        _claimant="http://example.org/",
        _claimant_normalized="http://example.org",
        _uri="http://example.org/",
        _uri_normalized="http://example.org",
        type="self-claim",
    )
    docuri_2 = models.DocumentURI(
        _claimant="http://example.org/",
        _claimant_normalized="http://example.org",
        _uri="https://example.org/",
        _uri_normalized="https://example.org",
        type="rel-canonical",
    )

    req.db.add(models.Document(document_uris=[docuri_1, docuri_2]))
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert docuri_1.claimant_normalized == "httpx://example.org"
    assert docuri_2.claimant_normalized == "httpx://example.org"