Ejemplo n.º 1
0
def test_it_merges_documents_when_duplicates_found(req):
    docuri_1 = models.DocumentURI(claimant='http://example.org/',
                                  uri='http://example.org/',
                                  type='self-claim')
    docuri_2 = models.DocumentURI(claimant='https://example.net/',
                                  uri='https://example.org/',
                                  type='rel-canonical')

    req.db.add_all([
        models.Document(document_uris=[docuri_1]),
        models.Document(document_uris=[docuri_2]),
    ])
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert req.db.query(models.Document).count() == 1
Ejemplo n.º 2
0
def test_it_deletes_duplicate_document_meta_objects(req):
    docmeta_1 = models.DocumentMeta(_claimant='http://example.org/',
                                    type='title',
                                    value=['Test Title'])
    docmeta_1._claimant_normalized = 'http://example.org'
    docmeta_2 = models.DocumentMeta(_claimant='https://example.org/',
                                    type='title',
                                    value=['Test Title'])
    docmeta_2._claimant_normalized = 'https://example.org'

    req.db.add_all([
        models.Document(meta=[docmeta_1]),
        models.Document(meta=[docmeta_2]),
    ])
    req.db.flush()

    normalize_uris.normalize_document_meta(req)

    assert req.db.query(models.DocumentMeta).count() == 1
Ejemplo n.º 3
0
def _annotation(**kwargs):
    args = {
        'userid': 'acct:[email protected]',
        'created': datetime.utcnow(),
        'updated': datetime.utcnow(),
        'target_selectors': [],
        'document': models.Document(),
    }
    args.update(kwargs)
    return models.Annotation(**args)
Ejemplo n.º 4
0
class TestDocumentSearchIndexPresenter(object):
    @pytest.mark.parametrize('document,expected', [
        (models.Document(title='Foo'), {'title': ['Foo']}),
        (models.Document(title=''), {}),
        (models.Document(title=None), {}),
        (models.Document(web_uri='http://foo.org'), {'web_uri': 'http://foo.org'}),
        (models.Document(web_uri=''), {}),
        (models.Document(web_uri=None), {}),
        (models.Document(title='Foo', web_uri='http://foo.org'), {'title': ['Foo'], 'web_uri': 'http://foo.org'}),
        (None, {})
    ])
    def test_asdict(self, document, expected):
        assert expected == DocumentSearchIndexPresenter(document).asdict()
Ejemplo n.º 5
0
    def test_asdict(self, db_session):
        document = models.Document(
            title='Foo',
            document_uris=[
                models.DocumentURI(uri='http://foo.com',
                                   claimant='http://foo.com'),
                models.DocumentURI(uri='http://foo.org',
                                   claimant='http://foo.com',
                                   type='rel-canonical')
            ])
        db_session.add(document)
        db_session.flush()

        presenter = DocumentJSONPresenter(document)
        expected = {'title': ['Foo']}
        assert expected == presenter.asdict()
Ejemplo n.º 6
0
    def test_asdict_does_not_render_other_meta_than_title(self, db_session):
        document = models.Document(meta=[
            models.DocumentMeta(
                type='title', value=['Foo'], claimant='http://foo.com'),
            models.DocumentMeta(type='twitter.url',
                                value=['http://foo.com'],
                                claimant='http://foo.com'),
            models.DocumentMeta(type='facebook.title',
                                value=['FB Title'],
                                claimant='http://foo.com'),
        ])
        db_session.add(document)
        db_session.flush()

        presenter = DocumentJSONPresenter(document)
        assert {'title': ['Foo']} == presenter.asdict()
Ejemplo n.º 7
0
def test_it_normalizes_document_meta_claimant(req):
    docmeta_1 = models.DocumentMeta(_claimant='http://example.org/',
                                    _claimant_normalized='http://example.org',
                                    type='title',
                                    value=['Test Title'])
    docmeta_2 = models.DocumentMeta(_claimant='http://example.net/',
                                    _claimant_normalized='http://example.net',
                                    type='title',
                                    value=['Test Title'])

    req.db.add(models.Document(meta=[docmeta_1, docmeta_2]))
    req.db.flush()

    normalize_uris.normalize_document_meta(req)

    assert docmeta_1.claimant_normalized == 'httpx://example.org'
    assert docmeta_2.claimant_normalized == 'httpx://example.net'
Ejemplo n.º 8
0
def test_it_deletes_duplicate_document_uri_objects(req):
    docuri_1 = models.DocumentURI(_claimant='http://example.org/',
                                  _claimant_normalized='http://example.org',
                                  _uri='http://example.org/',
                                  _uri_normalized='http://example.org',
                                  type='self-claim')
    docuri_2 = models.DocumentURI(_claimant='https://example.org/',
                                  _claimant_normalized='https://example.org',
                                  _uri='https://example.org/',
                                  _uri_normalized='https://example.org',
                                  type='self-claim')

    req.db.add(models.Document(document_uris=[docuri_1, docuri_2]))
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert req.db.query(models.DocumentURI).count() == 1
Ejemplo n.º 9
0
def test_it_normalizes_document_uris_claimant(req):
    docuri_1 = models.DocumentURI(_claimant='http://example.org/',
                                  _claimant_normalized='http://example.org',
                                  _uri='http://example.org/',
                                  _uri_normalized='http://example.org',
                                  type='self-claim')
    docuri_2 = models.DocumentURI(_claimant='http://example.org/',
                                  _claimant_normalized='http://example.org',
                                  _uri='https://example.org/',
                                  _uri_normalized='https://example.org',
                                  type='rel-canonical')

    req.db.add(models.Document(document_uris=[docuri_1, docuri_2]))
    req.db.flush()

    normalize_uris.normalize_document_uris(req)

    assert docuri_1.claimant_normalized == 'httpx://example.org'
    assert docuri_2.claimant_normalized == 'httpx://example.org'