def merge_data(self, request): master = document.Document( document_uris=[ document.DocumentURI( claimant='https://en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='self-claim') ], meta=[ document.DocumentMeta( claimant='https://en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia') ]) duplicate = document.Document( document_uris=[ document.DocumentURI( claimant='https://m.en.wikipedia.org/wiki/Main_Page', uri='https://en.wikipedia.org/wiki/Main_Page', type='rel-canonical') ], meta=[ document.DocumentMeta( claimant='https://m.en.wikipedia.org/wiki/Main_Page', type='title', value='Wikipedia, the free encyclopedia') ]) db.Session.add_all([master, duplicate]) db.Session.flush() return (master, duplicate)
def test_it_returns_the_value_of_the_first_title_DocumentMeta(self): doc = document.Document() document.DocumentMeta(type='title', value=['The US Title'], document=doc, claimant='http://example.com') document.DocumentMeta(type='title', value=['The UK Title'], document=doc, claimant='http://example.co.uk') db.Session.add(doc) db.Session.flush() assert doc.title == 'The US Title'
def test_it_updates_an_existing_DocumentMeta_if_there_is_one(self): claimant = 'http://example.com/claimant' type_ = 'title' value = 'the title' document_ = document.Document() created = yesterday() updated = now() document_meta = document.DocumentMeta( claimant=claimant, type=type_, value=value, document=document_, created=created, updated=updated, ) db.Session.add(document_meta) new_updated = now() document.create_or_update_document_meta( session=db.Session, claimant=claimant, type=type_, value='new value', document=document.Document(), # This should be ignored. created=now(), # This should be ignored. updated=new_updated, ) assert document_meta.value == 'new value' assert document_meta.updated == new_updated assert document_meta.created == created, "It shouldn't update created" assert document_meta.document == document_, ( "It shouldn't update document") assert len(db.Session.query(document.DocumentMeta).all()) == 1, ( "It shouldn't have added any new objects to the db")
def test_it_returns_None_if_there_are_no_title_DocumentMetas(self): doc = document.Document() document.DocumentMeta(type='other', value='something', document=doc, claimant='http://example.com') db.Session.add(doc) db.Session.flush() assert doc.title is None
def test_it_returns_the_value_of_the_one_title_DocumentMeta(self): """When there's only one DocumentMeta it should return its title.""" doc = document.Document() document.DocumentMeta(type='title', value=['The Title'], document=doc, claimant='http://example.com') db.Session.add(doc) db.Session.flush() assert doc.title == 'The Title'
def test_it_creates_a_new_DocumentMeta_if_there_is_no_existing_one(self): claimant = 'http://example.com/claimant' type_ = 'title' value = 'the title' document_ = document.Document() created = yesterday() updated = now() # Add one non-matching DocumentMeta to the database. # This should be ignored. db.Session.add( document.DocumentMeta( claimant=claimant, # Different type means this should not match the query. type='different', value=value, document=document_, created=created, updated=updated, )) document.create_or_update_document_meta( session=db.Session, claimant=claimant, type=type_, value=value, document=document_, created=created, updated=updated, ) document_meta = db.Session.query(document.DocumentMeta).all()[-1] assert document_meta.claimant == claimant assert document_meta.type == type_ assert document_meta.value == value assert document_meta.document == document_ assert document_meta.created == created assert document_meta.updated == updated