Esempio n. 1
0
    def merge_data(self, db_session, request):
        master = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='self-claim')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])
        duplicate_1 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='rel-canonical')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://m.en.wikipedia.org/wiki/Main_Page',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])
        duplicate_2 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant='https://en.wikipedia.org/wiki/Home',
                    uri='https://en.wikipedia.org/wiki/Main_Page',
                    type='rel-canonical')
            ],
            meta=[
                document.DocumentMeta(
                    claimant='https://en.wikipedia.org/wiki/Home',
                    type='title',
                    value='Wikipedia, the free encyclopedia')
            ])

        db_session.add_all([master, duplicate_1, duplicate_2])
        db_session.flush()

        master_ann_1 = models.Annotation(userid='luke', document_id=master.id)
        master_ann_2 = models.Annotation(userid='alice', document_id=master.id)
        duplicate_1_ann_1 = models.Annotation(userid='lucy',
                                              document_id=duplicate_1.id)
        duplicate_1_ann_2 = models.Annotation(userid='bob',
                                              document_id=duplicate_1.id)
        duplicate_2_ann_1 = models.Annotation(userid='amy',
                                              document_id=duplicate_2.id)
        duplicate_2_ann_2 = models.Annotation(userid='dan',
                                              document_id=duplicate_2.id)
        db_session.add_all([
            master_ann_1, master_ann_2, duplicate_1_ann_1, duplicate_1_ann_2,
            duplicate_2_ann_1, duplicate_2_ann_2
        ])
        return (master, duplicate_1, duplicate_2)
Esempio n. 2
0
    def test_it_updates_an_existing_DocumentMeta_if_there_is_one(
            self, db_session):
        claimant = "http://example.com/claimant"
        type_ = "title"
        value = "the title"
        document_ = document.Document()
        created = yesterday()
        updated = now()
        document_meta = document.DocumentMeta(
            claimant=claimant,
            type=type_,
            value=value,
            document=document_,
            created=created,
            updated=updated,
        )
        db_session.add(document_meta)

        new_updated = now()
        document.create_or_update_document_meta(
            session=db_session,
            claimant=claimant,
            type=type_,
            value="new value",
            document=document.Document(),  # This should be ignored.
            created=now(),  # This should be ignored.
            updated=new_updated,
        )

        assert document_meta.value == "new value"
        assert document_meta.updated == new_updated
        assert document_meta.created == created, "It shouldn't update created"
        assert document_meta.document == document_, "It shouldn't update document"
        assert (len(db_session.query(document.DocumentMeta).all()) == 1
                ), "It shouldn't have added any new objects to the db"
Esempio n. 3
0
    def test_it_creates_a_new_DocumentMeta_if_there_is_no_existing_one(
            self, db_session):
        claimant = "http://example.com/claimant"
        type_ = "title"
        value = "the title"
        document_ = document.Document()
        created = yesterday()
        updated = now()

        # Add one non-matching DocumentMeta to the database.
        # This should be ignored.
        db_session.add(
            document.DocumentMeta(
                claimant=claimant,
                # Different type means this should not match the query.
                type="different",
                value=value,
                document=document_,
                created=created,
                updated=updated,
            ))

        document.create_or_update_document_meta(
            session=db_session,
            claimant=claimant,
            type=type_,
            value=value,
            document=document_,
            created=created,
            updated=updated,
        )

        document_meta = db_session.query(document.DocumentMeta).all()[-1]
        assert document_meta.claimant == claimant
        assert document_meta.type == type_
        assert document_meta.value == value
        assert document_meta.document == document_
        assert document_meta.created == created
        assert document_meta.updated == updated
Esempio n. 4
0
    def merge_data(self, db_session, request):
        master = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant="https://en.wikipedia.org/wiki/Main_Page",
                    uri="https://en.wikipedia.org/wiki/Main_Page",
                    type="self-claim",
                )
            ],
            meta=[
                document.DocumentMeta(
                    claimant="https://en.wikipedia.org/wiki/Main_Page",
                    type="title",
                    value="Wikipedia, the free encyclopedia",
                )
            ],
        )
        duplicate_1 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant="https://m.en.wikipedia.org/wiki/Main_Page",
                    uri="https://en.wikipedia.org/wiki/Main_Page",
                    type="rel-canonical",
                )
            ],
            meta=[
                document.DocumentMeta(
                    claimant="https://m.en.wikipedia.org/wiki/Main_Page",
                    type="title",
                    value="Wikipedia, the free encyclopedia",
                )
            ],
        )
        duplicate_2 = document.Document(
            document_uris=[
                document.DocumentURI(
                    claimant="https://en.wikipedia.org/wiki/Home",
                    uri="https://en.wikipedia.org/wiki/Main_Page",
                    type="rel-canonical",
                )
            ],
            meta=[
                document.DocumentMeta(
                    claimant="https://en.wikipedia.org/wiki/Home",
                    type="title",
                    value="Wikipedia, the free encyclopedia",
                )
            ],
        )

        db_session.add_all([master, duplicate_1, duplicate_2])
        db_session.flush()

        master_ann_1 = models.Annotation(userid="luke", document_id=master.id)
        master_ann_2 = models.Annotation(userid="alice", document_id=master.id)
        duplicate_1_ann_1 = models.Annotation(userid="lucy",
                                              document_id=duplicate_1.id)
        duplicate_1_ann_2 = models.Annotation(userid="bob",
                                              document_id=duplicate_1.id)
        duplicate_2_ann_1 = models.Annotation(userid="amy",
                                              document_id=duplicate_2.id)
        duplicate_2_ann_2 = models.Annotation(userid="dan",
                                              document_id=duplicate_2.id)
        db_session.add_all([
            master_ann_1,
            master_ann_2,
            duplicate_1_ann_1,
            duplicate_1_ann_2,
            duplicate_2_ann_1,
            duplicate_2_ann_2,
        ])
        return (master, duplicate_1, duplicate_2)