def test_index_institution_refs(add_citation, add_institution): """ When the document is linked with an institution, an institution reference should be included in the document. """ citation = add_citation() institution = add_institution(state='CA', country='US') # Link inst -> citation. Institution_Document.create( institution=institution, document=citation.document, ) Citation_Index.es_insert() doc = config.es.get( index='citation', id=citation.id, ) assert doc['_source']['institution_id'] == institution.id assert doc['_source']['state'] == 'CA' assert doc['_source']['country'] == 'US'
def test_institution(add_doc, add_institution, add_citation): """ Citation#institution should provide the document's institution. """ document = add_doc() institution = add_institution() # Link inst -> document. Institution_Document.create( institution=institution, document=document, ) citation = add_citation(document=document) assert citation.institution.id == institution.id
def test_unique_pairs(add_doc, add_institution): """ Don't allow duplicate links between the same doc -> inst pair. """ inst = add_institution() doc = add_doc() Institution_Document.create( institution=inst, document=doc, ) with pytest.raises(IntegrityError): Institution_Document.create( institution=inst, document=doc, )
def doc_to_inst(doc_id): """ Match a document with an institution. """ doc = Document.get(Document.id==doc_id) inst = ( Institution .select() .where(Institution.domain==doc.syllabus.domain) .first() ) if inst: Institution_Document.create( institution=inst, document=doc, )
def test_country_facets(add_institution, add_citation): """ country_facets() should provide a list of label/value/count dicts. """ i1 = add_institution(country='AU') i2 = add_institution(country='CA') i3 = add_institution(country='NZ') for i in range(3): c = add_citation() Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation() Institution_Document.create(institution=i2, document=c.document) for i in range(1): c = add_citation() Institution_Document.create(institution=i3, document=c.document) Citation_Index.es_insert() facets = country_facets() assert facets == [ dict(label='Australia', value=i1.country, count=3), dict(label='Canada', value=i2.country, count=2), dict(label='New Zealand', value=i3.country, count=1), ]
def test_filter_country(add_text, add_citation, add_institution): """ Filter on country as a keyword value. """ t1 = add_text() t2 = add_text() t3 = add_text() i1 = add_institution(country='USA') i2 = add_institution(country='CAN') for i in range(3): c = add_citation(text=t1) Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation(text=t2) Institution_Document.create(institution=i1, document=c.document) for i in range(1): c = add_citation(text=t3) Institution_Document.create(institution=i2, document=c.document) Citation_Index.es_insert() ranks = Citation_Index.compute_ranking(dict( country='USA' )) assert ranks == { str(t1.id): 3, str(t2.id): 2, }
def test_filter_institution(add_text, add_citation, add_institution): """ Filter by institution. """ t1 = add_text() t2 = add_text() t3 = add_text() i1 = add_institution() i2 = add_institution() for i in range(3): c = add_citation(text=t1) Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation(text=t2) Institution_Document.create(institution=i1, document=c.document) for i in range(1): c = add_citation(text=t3) Institution_Document.create(institution=i2, document=c.document) Citation_Index.es_insert() ranks = Citation_Index.compute_ranking(dict( institution_id=i1.id )) assert ranks == { str(t1.id): 3, str(t2.id): 2, }
def test_merge_included(add_institution, add_citation): """ When ids are passed for institutions that fall outside of the default page, merge the extra facets into the baseline ranking. """ i1 = add_institution(name='Institution 1') i2 = add_institution(name='Institution 2') i3 = add_institution(name='Institution 3') for i in range(3): c = add_citation() Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation() Institution_Document.create(institution=i2, document=c.document) for i in range(1): c = add_citation() Institution_Document.create(institution=i3, document=c.document) Citation_Index.es_insert() Institution_Index.es_insert() facets = institution_facets(depth=2, include=[i2.id, i3.id]) assert facets == [ dict(label='Institution 1', value=i1.id, count=3), dict(label='Institution 2', value=i2.id, count=2), # Dedupe 2. dict(label='Institution 3', value=i3.id, count=1), # Append 3. ]
def test_state_facets(add_institution, add_citation): """ state_facets() should provide a list of label/value/count dicts. """ i1 = add_institution(state='CA') i2 = add_institution(state='AL') i3 = add_institution(state='MA') for i in range(3): c = add_citation() Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation() Institution_Document.create(institution=i2, document=c.document) for i in range(1): c = add_citation() Institution_Document.create(institution=i3, document=c.document) Citation_Index.es_insert() facets = state_facets() assert facets == [ dict(label='California', value=i1.state, count=3), dict(label='Alabama', value=i2.state, count=2), dict(label='Massachusetts', value=i3.state, count=1), ]
def test_filter_country(add_text, add_citation, add_institution): """ Filter on country as a keyword value. """ t1 = add_text() t2 = add_text() t3 = add_text() i1 = add_institution(country='USA') i2 = add_institution(country='CAN') for i in range(3): c = add_citation(text=t1) Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation(text=t2) Institution_Document.create(institution=i1, document=c.document) for i in range(1): c = add_citation(text=t3) Institution_Document.create(institution=i2, document=c.document) Citation_Index.es_insert() ranks = Citation_Index.compute_ranking(dict(country='USA')) assert ranks == { str(t1.id): 3, str(t2.id): 2, }
def test_filter_institution(add_text, add_citation, add_institution): """ Filter by institution. """ t1 = add_text() t2 = add_text() t3 = add_text() i1 = add_institution() i2 = add_institution() for i in range(3): c = add_citation(text=t1) Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation(text=t2) Institution_Document.create(institution=i1, document=c.document) for i in range(1): c = add_citation(text=t3) Institution_Document.create(institution=i2, document=c.document) Citation_Index.es_insert() ranks = Citation_Index.compute_ranking(dict(institution_id=i1.id)) assert ranks == { str(t1.id): 3, str(t2.id): 2, }
def test_merge_included_facets(add_institution, add_citation): """ Don't duplicate included facets are already present in the ranking. """ i1 = add_institution(name='Institution 1') i2 = add_institution(name='Institution 2') i3 = add_institution(name='Institution 3') for i in range(3): c = add_citation() Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation() Institution_Document.create(institution=i2, document=c.document) for i in range(1): c = add_citation() Institution_Document.create(institution=i3, document=c.document) Citation_Index.es_insert() counts = Citation_Index.count_facets( 'institution_id', include=[i2.id, i3.id], ) # Dedupe 2 and 3. assert counts == [ (i1.id, 3), (i2.id, 2), (i3.id, 1), ]
def test_institution_facets(add_institution, add_citation): """ institution_facets() should provide a list of label/value/count dicts. """ i1 = add_institution(name='Institution 1') i2 = add_institution(name='Institution 2') i3 = add_institution(name='Institution 3') for i in range(3): c = add_citation() Institution_Document.create(institution=i1, document=c.document) for i in range(2): c = add_citation() Institution_Document.create(institution=i2, document=c.document) for i in range(1): c = add_citation() Institution_Document.create(institution=i3, document=c.document) Citation_Index.es_insert() Institution_Index.es_insert() facets = institution_facets() assert facets == [ dict(label='Institution 1', value=i1.id, count=3), dict(label='Institution 2', value=i2.id, count=2), dict(label='Institution 3', value=i3.id, count=1), ]
def test_append_included_facets(add_institution, add_citation): """ When "included" facets have counts that put them below of the baseline ranking, append the extra facets to the bottom of the list. """ i1 = add_institution(name='Institution 1') i2 = add_institution(name='Institution 2') i3 = add_institution(name='Institution 3') i4 = add_institution(name='Institution 4') for i in range(4): c = add_citation() Institution_Document.create(institution=i1, document=c.document) for i in range(3): c = add_citation() Institution_Document.create(institution=i2, document=c.document) for i in range(2): c = add_citation() Institution_Document.create(institution=i3, document=c.document) for i in range(1): c = add_citation() Institution_Document.create(institution=i4, document=c.document) Citation_Index.es_insert() counts = Citation_Index.count_facets( 'institution_id', include=[i3.id, i4.id], depth=2, ) assert counts == [ (i1.id, 4), (i2.id, 3), # Include 3 and 4. (i3.id, 2), (i4.id, 1), ]