Example #1
0
def test_state_facets(add_institution, add_citation):
    """
    state_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(state='CA')
    i2 = add_institution(state='AL')
    i3 = add_institution(state='MA')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = state_facets()

    assert facets == [
        dict(label='California', value=i1.state, count=3),
        dict(label='Alabama', value=i2.state, count=2),
        dict(label='Massachusetts', value=i3.state, count=1),
    ]
def test_country_facets(add_institution, add_citation):

    """
    country_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(country='AU')
    i2 = add_institution(country='CA')
    i3 = add_institution(country='NZ')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = country_facets()

    assert facets == [
        dict(label='Australia', value=i1.country, count=3),
        dict(label='Canada', value=i2.country, count=2),
        dict(label='New Zealand', value=i3.country, count=1),
    ]
def test_index_institution_refs(add_citation, add_institution):
    """
    When the document is linked with an institution, an institution reference
    should be included in the document.
    """

    citation = add_citation()

    institution = add_institution(state='CA', country='US')

    # Link inst -> citation.
    Institution_Document.create(
        institution=institution,
        document=citation.document,
    )

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['institution_id'] == institution.id
    assert doc['_source']['state'] == 'CA'
    assert doc['_source']['country'] == 'US'
def test_size(add_text, add_doc, add_citation):

    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()
    t4 = add_text()

    for i in range(3):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t2, document=doc)

    for i in range(2):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t3, document=doc)

    for i in range(1):
        doc = add_doc()
        add_citation(text=t1, document=doc)
        add_citation(text=t4, document=doc)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = assigned_with(t1.id, size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
def test_count_facets(add_citation, add_subfield, add_subfield_document):

    """
    Citation_Index.count_facets() should return a set of (value, count) tuples
    for a given field.
    """

    sf1 = add_subfield()
    sf2 = add_subfield()
    sf3 = add_subfield()

    for i in range(3):
        c = add_citation()
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation()
        add_subfield_document(subfield=sf2, document=c.document)

    for i in range(1):
        c = add_citation()
        add_subfield_document(subfield=sf3, document=c.document)

    Citation_Index.es_insert()

    counts = Citation_Index.count_facets('subfield_id')

    assert counts == [
        (sf1.id, 3),
        (sf2.id, 2),
        (sf3.id, 1),
    ]
def test_search_filter(add_text, add_citation):

    """
    Free-text search query should be applied.
    """

    t1 = add_text(title='match one')
    t2 = add_text(title='two')
    t3 = add_text(title='match three')
    t4 = add_text(title='four')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Example #7
0
def test_unfiltered(add_text, add_citation):
    """
    When no filters or query is passed, return the overall rankings.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts()

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_subfield_facets(add_citation, add_subfield, add_subfield_document):

    """
    subfield_facets() should provide a list of label/value/count dicts.
    """

    sf1 = add_subfield(name="Subfield 1")
    sf2 = add_subfield(name="Subfield 2")
    sf3 = add_subfield(name="Subfield 3")

    for i in range(3):
        c = add_citation()
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation()
        add_subfield_document(subfield=sf2, document=c.document)

    for i in range(1):
        c = add_citation()
        add_subfield_document(subfield=sf3, document=c.document)

    Citation_Index.es_insert()
    Subfield_Index.es_insert()

    facets = subfield_facets()

    assert facets == [
        dict(label="Subfield 1", value=sf1.id, count=3),
        dict(label="Subfield 2", value=sf2.id, count=2),
        dict(label="Subfield 3", value=sf3.id, count=1),
    ]
Example #9
0
def test_metadata_filters(add_text, add_citation):
    """
    Citation metadata filters should be applied.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus1')
    t4 = add_text(corpus='corpus2')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(filters=dict(corpus='corpus2'))

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t4.id)
def assigned_with(text_id, size=200):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
def test_institution_facets(add_institution, add_citation):
    """
    institution_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()
    Institution_Index.es_insert()

    facets = institution_facets()

    assert facets == [
        dict(label='Institution 1', value=i1.id, count=3),
        dict(label='Institution 2', value=i2.id, count=2),
        dict(label='Institution 3', value=i3.id, count=1),
    ]
def test_filter_corpus(add_text, add_citation):

    """
    Filter on corpus as a keyword value.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus1')
    t3 = add_text(corpus='corpus2')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        corpus='corpus1'
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_index_institution_refs(add_citation, add_institution):

    """
    When the document is linked with an institution, an institution reference
    should be included in the document.
    """

    citation = add_citation()

    institution = add_institution(state='CA', country='US')

    # Link inst -> citation.
    Institution_Document.create(
        institution=institution,
        document=citation.document,
    )

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['institution_id'] == institution.id
    assert doc['_source']['state'] == 'CA'
    assert doc['_source']['country'] == 'US'
def test_institution_facets(add_institution, add_citation):

    """
    institution_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()
    Institution_Index.es_insert()

    facets = institution_facets()

    assert facets == [
        dict(label='Institution 1', value=i1.id, count=3),
        dict(label='Institution 2', value=i2.id, count=2),
        dict(label='Institution 3', value=i3.id, count=1),
    ]
def test_merge_included_facets(add_institution, add_citation):

    """
    Don't duplicate included facets are already present in the ranking.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    counts = Citation_Index.count_facets(
        'institution_id',
        include=[i2.id, i3.id],
    )

    # Dedupe 2 and 3.
    assert counts == [
        (i1.id, 3),
        (i2.id, 2),
        (i3.id, 1),
    ]
def test_size(add_text, add_citation):

    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
Example #17
0
def assigned_with(text_id, size=1000):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
def test_unfiltered(add_text, add_citation):

    """
    When no filters or query is passed, return the overall rankings.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts()

    assert len(texts['hits']) == 3
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
    assert texts['hits'][2]['_id'] == str(t3.id)
def test_state_facets(add_institution, add_citation):

    """
    state_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(state='CA')
    i2 = add_institution(state='AL')
    i3 = add_institution(state='MA')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = state_facets()

    assert facets == [
        dict(label='California', value=i1.state, count=3),
        dict(label='Alabama', value=i2.state, count=2),
        dict(label='Massachusetts', value=i3.state, count=1),
    ]
def test_metadata_filters(add_text, add_citation):

    """
    Citation metadata filters should be applied.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus1')
    t4 = add_text(corpus='corpus2')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(filters=dict(
        corpus='corpus2'
    ))

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t2.id)
    assert texts['hits'][1]['_id'] == str(t4.id)
def test_unfiltered(add_text, add_citation):

    """
    When no filters are provided, return total counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking()

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
def test_count_facets(add_citation, add_subfield, add_subfield_document):

    """
    Citation_Index.count_facets() should return a set of (value, count) tuples
    for a given field.
    """

    sf1 = add_subfield()
    sf2 = add_subfield()
    sf3 = add_subfield()

    for i in range(3):
        c = add_citation()
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation()
        add_subfield_document(subfield=sf2, document=c.document)

    for i in range(1):
        c = add_citation()
        add_subfield_document(subfield=sf3, document=c.document)

    Citation_Index.es_insert()

    counts = Citation_Index.count_facets('subfield_id')

    assert counts == [
        (sf1.id, 3),
        (sf2.id, 2),
        (sf3.id, 1),
    ]
Example #23
0
def test_country_facets(add_institution, add_citation):
    """
    country_facets() should provide a list of label/value/count dicts.
    """

    i1 = add_institution(country='AU')
    i2 = add_institution(country='CA')
    i3 = add_institution(country='NZ')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()

    facets = country_facets()

    assert facets == [
        dict(label='Australia', value=i1.country, count=3),
        dict(label='Canada', value=i2.country, count=2),
        dict(label='New Zealand', value=i3.country, count=1),
    ]
Example #24
0
def test_filter_institution(add_text, add_citation, add_institution):
    """
    Filter by institution.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution()
    i2 = add_institution()

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(institution_id=i1.id))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_ignore_filters_with_empty_values(empty, add_text, add_citation):

    """
    Ignore filters with empty values.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        corpus=empty
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
Example #26
0
def test_filter_country(add_text, add_citation, add_institution):
    """
    Filter on country as a keyword value.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution(country='USA')
    i2 = add_institution(country='CAN')

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(country='USA'))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_filter_multiple_values(add_text, add_citation):

    """
    When a list of values is provided for a filter key, match citations that
    include _any_ of the provided values for the key.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        corpus=['corpus1', 'corpus3']
    ))

    # Count both `corpus1` and `corpus3` citations.
    assert ranks == {
        str(t1.id): 3,
        str(t3.id): 1,
    }
Example #28
0
def test_filter_multiple_values(add_text, add_citation):
    """
    When a list of values is provided for a filter key, match citations that
    include _any_ of the provided values for the key.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(corpus=['corpus1', 'corpus3']))

    # Count both `corpus1` and `corpus3` citations.
    assert ranks == {
        str(t1.id): 3,
        str(t3.id): 1,
    }
Example #29
0
def test_docs_with_text(add_text, add_doc, add_citation):

    """
    Citation_Index.docs_with_text() should return a set of ids for documents
    that assign a given text.
    """

    t1 = add_text()
    t2 = add_text()

    d1 = add_doc()
    d2 = add_doc()
    d3 = add_doc()
    d4 = add_doc()

    add_citation(text=t1, document=d1)
    add_citation(text=t1, document=d2)
    add_citation(text=t2, document=d3)
    add_citation(text=t2, document=d4)

    Citation_Index.es_insert()

    doc_ids = Citation_Index.docs_with_text(t1.id)

    assert doc_ids == [d1.id, d2.id]
Example #30
0
def test_ignore_filters_with_empty_values(empty, add_text, add_citation):
    """
    Ignore filters with empty values.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(corpus=empty))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
def test_filter_country(add_text, add_citation, add_institution):

    """
    Filter on country as a keyword value.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution(country='USA')
    i2 = add_institution(country='CAN')

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        country='USA'
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Example #32
0
def test_filter_corpus(add_text, add_citation):
    """
    Filter on corpus as a keyword value.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus1')
    t3 = add_text(corpus='corpus2')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(corpus='corpus1'))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Example #33
0
def test_size(add_text, add_citation):
    """
    The 'size' argument should control the page length.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(t1)

    for i in range(2):
        add_citation(t2)

    for i in range(1):
        add_citation(t3)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(size=2)

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t2.id)
Example #34
0
def test_unfiltered(add_text, add_citation):
    """
    When no filters are provided, return total counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking()

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
Example #35
0
def test_search_filter(add_text, add_citation):
    """
    Free-text search query should be applied.
    """

    t1 = add_text(title='match one')
    t2 = add_text(title='two')
    t3 = add_text(title='match three')
    t4 = add_text(title='four')

    for i in range(4):
        add_citation(t1)

    for i in range(3):
        add_citation(t2)

    for i in range(2):
        add_citation(t3)

    for i in range(1):
        add_citation(t4)

    Citation_Index.es_insert()
    Text_Index.es_insert()

    texts = rank_texts(query='match')

    assert len(texts['hits']) == 2
    assert texts['hits'][0]['_id'] == str(t1.id)
    assert texts['hits'][1]['_id'] == str(t3.id)
Example #36
0
def test_subfield_facets(add_citation, add_subfield, add_subfield_document):
    """
    subfield_facets() should provide a list of label/value/count dicts.
    """

    sf1 = add_subfield(name='Subfield 1')
    sf2 = add_subfield(name='Subfield 2')
    sf3 = add_subfield(name='Subfield 3')

    for i in range(3):
        c = add_citation()
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation()
        add_subfield_document(subfield=sf2, document=c.document)

    for i in range(1):
        c = add_citation()
        add_subfield_document(subfield=sf3, document=c.document)

    Citation_Index.es_insert()
    Subfield_Index.es_insert()

    facets = subfield_facets()

    assert facets == [
        dict(label='Subfield 1', value=sf1.id, count=3),
        dict(label='Subfield 2', value=sf2.id, count=2),
        dict(label='Subfield 3', value=sf3.id, count=1),
    ]
def test_filter_institution(add_text, add_citation, add_institution):

    """
    Filter by institution.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution()
    i2 = add_institution()

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        institution_id=i1.id
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_merge_included(add_institution, add_citation):
    """
    When ids are passed for institutions that fall outside of the default page,
    merge the extra facets into the baseline ranking.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    Citation_Index.es_insert()
    Institution_Index.es_insert()

    facets = institution_facets(depth=2, include=[i2.id, i3.id])

    assert facets == [
        dict(label='Institution 1', value=i1.id, count=3),
        dict(label='Institution 2', value=i2.id, count=2),  # Dedupe 2.
        dict(label='Institution 3', value=i3.id, count=1),  # Append 3.
    ]
def rank_texts(filters={}, query=None, size=1000, page=1):

    """
    Filter and rank texts.

    Args:
        filters (dict): Citation metadata filters.
        query (str): A text metadata search query.
        size (str): Return N results.
        page (str): 1-indexed page offset.

    Returns:
        dict: Elasticsearch hits.
    """

    # Filter citation counts, if non-empty filters.
    if any(filters.values()):
        ranks = Citation_Index.compute_ranking(filters)

    else:
        ranks = None

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, query, size, page)

    return texts
def test_only_index_citations_with_displayed_texts(add_text, add_citation):
    """
    Only index citations linked with texts marked for display.
    """

    t1 = add_text(display=None)
    t2 = add_text(display=False)
    t3 = add_text(display=True)

    c1 = add_citation(text=t1)
    c2 = add_citation(text=t2)
    c3 = add_citation(text=t3)

    Citation_Index.es_insert()

    assert config.es.get(index='citation', id=c3.id)
    assert Citation_Index.es_count() == 1
def test_append_included_facets(add_institution, add_citation):

    """
    When "included" facets have counts that put them below of the baseline
    ranking, append the extra facets to the bottom of the list.
    """

    i1 = add_institution(name='Institution 1')
    i2 = add_institution(name='Institution 2')
    i3 = add_institution(name='Institution 3')
    i4 = add_institution(name='Institution 4')

    for i in range(4):
        c = add_citation()
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(3):
        c = add_citation()
        Institution_Document.create(institution=i2, document=c.document)

    for i in range(2):
        c = add_citation()
        Institution_Document.create(institution=i3, document=c.document)

    for i in range(1):
        c = add_citation()
        Institution_Document.create(institution=i4, document=c.document)

    Citation_Index.es_insert()

    counts = Citation_Index.count_facets(
        'institution_id',
        include=[i3.id, i4.id],
        depth=2,
    )

    assert counts == [

        (i1.id, 4),
        (i2.id, 3),

        # Include 3 and 4.
        (i3.id, 2),
        (i4.id, 1),

    ]
def test_only_index_citations_with_valid_texts(add_text, add_citation):
    """
    Only index citations linked with validated texts.
    """

    t1 = add_text(valid=None)
    t2 = add_text(valid=False)
    t3 = add_text(valid=True)

    c1 = add_citation(text=t1)
    c2 = add_citation(text=t2)
    c3 = add_citation(text=t3)

    Citation_Index.es_insert()

    assert config.es.get(index='citation', id=c3.id)
    assert Citation_Index.es_count() == 1
def test_only_index_citations_with_displayed_texts(add_text, add_citation):

    """
    Only index citations linked with texts marked for display.
    """

    t1 = add_text(display=None)
    t2 = add_text(display=False)
    t3 = add_text(display=True)

    c1 = add_citation(text=t1)
    c2 = add_citation(text=t2)
    c3 = add_citation(text=t3)

    Citation_Index.es_insert()

    assert config.es.get(index='citation', id=c3.id)
    assert Citation_Index.es_count() == 1
def test_only_index_citations_with_valid_texts(add_text, add_citation):

    """
    Only index citations linked with validated texts.
    """

    t1 = add_text(valid=None)
    t2 = add_text(valid=False)
    t3 = add_text(valid=True)

    c1 = add_citation(text=t1)
    c2 = add_citation(text=t2)
    c3 = add_citation(text=t3)

    Citation_Index.es_insert()

    assert config.es.get(index='citation', id=c3.id)
    assert Citation_Index.es_count() == 1
def test_index_citation_fields(add_citation):
    """
    Local rows - text_id, document_id, and corpus - should be included in
    the Elasticsearch document.
    """

    citation = add_citation()

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['text_id'] == citation.text_id
    assert doc['_source']['document_id'] == citation.document_id
    assert doc['_source']['corpus'] == citation.text.corpus
def test_compute_scores(add_text, add_citation):

    """
    For each text, compute the ratio between the square roots of the text's
    assignment count and the max assignment count.
    """

    t1 = add_text()

    t2 = add_text()
    t3 = add_text()

    t4 = add_text()
    t5 = add_text()
    t6 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)
        add_citation(text=t3)

    for i in range(1):
        add_citation(text=t4)
        add_citation(text=t5)
        add_citation(text=t6)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_scores()

    assert ranks == {

        str(t1.id): np.sqrt(3) / np.sqrt(3),

        str(t2.id): np.sqrt(2) / np.sqrt(3),
        str(t3.id): np.sqrt(2) / np.sqrt(3),

        str(t4.id): np.sqrt(1) / np.sqrt(3),
        str(t5.id): np.sqrt(1) / np.sqrt(3),
        str(t6.id): np.sqrt(1) / np.sqrt(3),

    }
def field_facets():
    """
    Materialize field facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets('field_id')
    return Field_Index.materialize_facets(counts)
def test_index_citation_fields(add_citation):

    """
    Local rows - text_id, document_id, and corpus - should be included in
    the Elasticsearch document.
    """

    citation = add_citation()

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['text_id'] == citation.text_id
    assert doc['_source']['document_id'] == citation.document_id
    assert doc['_source']['corpus'] == citation.text.corpus
def corpus_facets():
    """
    Materialize corpus facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets('corpus')
    return Text_Index.materialize_corpus_facets(counts)
def country_facets():
    """
    Materialize state facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets('country')
    return Institution_Index.materialize_country_facets(counts)
Example #51
0
def institution_facets():
    """
    Materialize institution facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets('institution_id')
    return Institution_Index.materialize_institution_facets(counts)
def country_facets():

    """
    Materialize state facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets("country")
    return Institution_Index.materialize_country_facets(counts)
def corpus_facets():

    """
    Materialize corpus facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets("corpus")
    return Text_Index.materialize_corpus_facets(counts)
def field_facets():

    """
    Materialize field facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets("field_id")
    return Field_Index.materialize_facets(counts)
def institution_facets():

    """
    Materialize institution facets with counts.

    Returns:
        dict: {label, value, count}
    """

    counts = Citation_Index.count_facets("institution_id")
    return Institution_Index.materialize_institution_facets(counts)
def test_filter_subfield(
    add_text,
    add_citation,
    add_subfield,
    add_subfield_document,
):

    """
    Filter by subfield.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    sf1 = add_subfield()
    sf2 = add_subfield()

    for i in range(3):
        c = add_citation(text=t1)
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        add_subfield_document(subfield=sf2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        subfield_id=sf1.id
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_index_field_refs(add_citation, add_subfield, add_subfield_document):

    """
    When the document is linked with a subfield, subfield / field referenecs
    should be included in the document.
    """

    citation = add_citation()
    subfield = add_subfield()

    # Link subfield -> citation.
    add_subfield_document(subfield=subfield, document=citation.document)

    Citation_Index.es_insert()

    doc = config.es.get(
        index='citation',
        id=citation.id,
    )

    assert doc['_source']['subfield_id'] == subfield.id
    assert doc['_source']['field_id'] == subfield.field_id