Esempio n. 1
0
    def merge_data(self, db_session, request):
        master = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='self-claim')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])
        duplicate = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='rel-canonical')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])

        db_session.add_all([master, duplicate])
        db_session.flush()
        return (master, duplicate)
Esempio n. 2
0
    def merge_data(self, db_session, request):
        master = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='self-claim')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])
        duplicate_1 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='rel-canonical')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])
        duplicate_2 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://en.wikipedia.org/wiki/Home',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='rel-canonical')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://en.wikipedia.org/wiki/Home',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])

        db_session.add_all([master, duplicate_1, duplicate_2])
        db_session.flush()

        master_ann_1 = models.Annotation(userid='luke', document_id=master.id)
        master_ann_2 = models.Annotation(userid='alice', document_id=master.id)
        duplicate_1_ann_1 = models.Annotation(userid='lucy',
                                              document_id=duplicate_1.id)
        duplicate_1_ann_2 = models.Annotation(userid='bob',
                                              document_id=duplicate_1.id)
        duplicate_2_ann_1 = models.Annotation(userid='amy',
                                              document_id=duplicate_2.id)
        duplicate_2_ann_2 = models.Annotation(userid='dan',
                                              document_id=duplicate_2.id)
        db_session.add_all([
            master_ann_1, master_ann_2, duplicate_1_ann_1, duplicate_1_ann_2,
            duplicate_2_ann_1, duplicate_2_ann_2
        ])
        return (master, duplicate_1, duplicate_2)
Esempio n. 3
0
    def test_it_returns_the_value_of_the_first_title_DocumentMeta(
            self, db_session):
        doc = document.Document()
        document.DocumentMeta(type='title',
                              value=['The US Title'],
                              document=doc,
                              claimant='http://example.com')
        document.DocumentMeta(type='title',
                              value=['The UK Title'],
                              document=doc,
                              claimant='http://example.co.uk')
        db_session.add(doc)
        db_session.flush()

        assert doc.title == 'The US Title'
Esempio n. 4
0
    def test_it_updates_an_existing_DocumentMeta_if_there_is_one(self, db_session):
        claimant = 'http://example.com/claimant'
        type_ = 'title'
        value = 'the title'
        document_ = document.Document()
        created = yesterday()
        updated = now()
        document_meta = document.DocumentMeta(
            claimant=claimant,
            type=type_,
            value=value,
            document=document_,
            created=created,
            updated=updated,
        )
        db_session.add(document_meta)

        new_updated = now()
        document.create_or_update_document_meta(
            session=db_session,
            claimant=claimant,
            type=type_,
            value='new value',
            document=document.Document(),  # This should be ignored.
            created=now(),  # This should be ignored.
            updated=new_updated,
        )

        assert document_meta.value == 'new value'
        assert document_meta.updated == new_updated
        assert document_meta.created == created, "It shouldn't update created"
        assert document_meta.document == document_, (
            "It shouldn't update document")
        assert len(db_session.query(document.DocumentMeta).all()) == 1, (
            "It shouldn't have added any new objects to the db")
Esempio n. 5
0
    def test_it_returns_None_if_there_are_no_title_DocumentMetas(
            self, db_session):
        doc = document.Document()
        document.DocumentMeta(type='other',
                              value='something',
                              document=doc,
                              claimant='http://example.com')
        db_session.add(doc)
        db_session.flush()

        assert doc.title is None
Esempio n. 6
0
    def test_it_returns_the_value_of_the_one_title_DocumentMeta(
            self, db_session):
        """When there's only one DocumentMeta it should return its title."""
        doc = document.Document()
        document.DocumentMeta(type='title',
                              value=['The Title'],
                              document=doc,
                              claimant='http://example.com')
        db_session.add(doc)
        db_session.flush()

        assert doc.title == 'The Title'
Esempio n. 7
0
    def test_it_creates_a_new_DocumentMeta_if_there_is_no_existing_one(
            self, db_session):
        claimant = 'http://example.com/claimant'
        type_ = 'title'
        value = 'the title'
        document_ = document.Document()
        created = yesterday()
        updated = now()

        # Add one non-matching DocumentMeta to the database.
        # This should be ignored.
        db_session.add(
            document.DocumentMeta(
                claimant=claimant,
                # Different type means this should not match the query.
                type='different',
                value=value,
                document=document_,
                created=created,
                updated=updated,
            ))

        document.create_or_update_document_meta(
            session=db_session,
            claimant=claimant,
            type=type_,
            value=value,
            document=document_,
            created=created,
            updated=updated,
        )

        document_meta = db_session.query(document.DocumentMeta).all()[-1]
        assert document_meta.claimant == claimant
        assert document_meta.type == type_
        assert document_meta.value == value
        assert document_meta.document == document_
        assert document_meta.created == created
        assert document_meta.updated == updated