Example #1
0
    def merge_data(self, request):
        master = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='self-claim')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])
        duplicate = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='rel-canonical')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])

        db.Session.add_all([master, duplicate])
        db.Session.flush()
        return (master, duplicate)
Example #2
0
    def test_it_returns_the_value_of_the_first_title_DocumentMeta(self):
        doc = document.Document()
        document.DocumentMeta(type='title',
                              value=['The US Title'],
                              document=doc,
                              claimant='http://example.com')
        document.DocumentMeta(type='title',
                              value=['The UK Title'],
                              document=doc,
                              claimant='http://example.co.uk')
        db.Session.add(doc)
        db.Session.flush()

        assert doc.title == 'The US Title'
Example #3
0
    def test_it_updates_an_existing_DocumentMeta_if_there_is_one(self):
        claimant = 'http://example.com/claimant'
        type_ = 'title'
        value = 'the title'
        document_ = document.Document()
        created = yesterday()
        updated = now()
        document_meta = document.DocumentMeta(
            claimant=claimant,
            type=type_,
            value=value,
            document=document_,
            created=created,
            updated=updated,
        )
        db.Session.add(document_meta)

        new_updated = now()
        document.create_or_update_document_meta(
            session=db.Session,
            claimant=claimant,
            type=type_,
            value='new value',
            document=document.Document(),  # This should be ignored.
            created=now(),  # This should be ignored.
            updated=new_updated,
        )

        assert document_meta.value == 'new value'
        assert document_meta.updated == new_updated
        assert document_meta.created == created, "It shouldn't update created"
        assert document_meta.document == document_, (
            "It shouldn't update document")
        assert len(db.Session.query(document.DocumentMeta).all()) == 1, (
            "It shouldn't have added any new objects to the db")
Example #4
0
    def test_it_returns_None_if_there_are_no_title_DocumentMetas(self):
        doc = document.Document()
        document.DocumentMeta(type='other',
                              value='something',
                              document=doc,
                              claimant='http://example.com')
        db.Session.add(doc)
        db.Session.flush()

        assert doc.title is None
Example #5
0
    def test_it_returns_the_value_of_the_one_title_DocumentMeta(self):
        """When there's only one DocumentMeta it should return its title."""
        doc = document.Document()
        document.DocumentMeta(type='title',
                              value=['The Title'],
                              document=doc,
                              claimant='http://example.com')
        db.Session.add(doc)
        db.Session.flush()

        assert doc.title == 'The Title'
Example #6
0
    def test_it_creates_a_new_DocumentMeta_if_there_is_no_existing_one(self):
        claimant = 'http://example.com/claimant'
        type_ = 'title'
        value = 'the title'
        document_ = document.Document()
        created = yesterday()
        updated = now()

        # Add one non-matching DocumentMeta to the database.
        # This should be ignored.
        db.Session.add(
            document.DocumentMeta(
                claimant=claimant,
                # Different type means this should not match the query.
                type='different',
                value=value,
                document=document_,
                created=created,
                updated=updated,
            ))

        document.create_or_update_document_meta(
            session=db.Session,
            claimant=claimant,
            type=type_,
            value=value,
            document=document_,
            created=created,
            updated=updated,
        )

        document_meta = db.Session.query(document.DocumentMeta).all()[-1]
        assert document_meta.claimant == claimant
        assert document_meta.type == type_
        assert document_meta.value == value
        assert document_meta.document == document_
        assert document_meta.created == created
        assert document_meta.updated == updated