Beispiel #1
0
def test_ignore_filters_with_empty_values(empty, add_text, add_citation):
    """
    Ignore filters with empty values.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(corpus=empty))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
Beispiel #2
0
def test_filter_multiple_values(add_text, add_citation):
    """
    When a list of values is provided for a filter key, match citations that
    include _any_ of the provided values for the key.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(corpus=['corpus1', 'corpus3']))

    # Count both `corpus1` and `corpus3` citations.
    assert ranks == {
        str(t1.id): 3,
        str(t3.id): 1,
    }
Beispiel #3
0
def test_filter_country(add_text, add_citation, add_institution):
    """
    Filter on country as a keyword value.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution(country='USA')
    i2 = add_institution(country='CAN')

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(country='USA'))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Beispiel #4
0
def test_filter_institution(add_text, add_citation, add_institution):
    """
    Filter by institution.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution()
    i2 = add_institution()

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(institution_id=i1.id))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Beispiel #5
0
def test_filter_corpus(add_text, add_citation):
    """
    Filter on corpus as a keyword value.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus1')
    t3 = add_text(corpus='corpus2')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(corpus='corpus1'))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Beispiel #6
0
def test_unfiltered(add_text, add_citation):
    """
    When no filters are provided, return total counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking()

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
Beispiel #7
0
def assigned_with(text_id, size=1000):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
def rank_texts(filters={}, query=None, size=1000, page=1):

    """
    Filter and rank texts.

    Args:
        filters (dict): Citation metadata filters.
        query (str): A text metadata search query.
        size (str): Return N results.
        page (str): 1-indexed page offset.

    Returns:
        dict: Elasticsearch hits.
    """

    # Filter citation counts, if non-empty filters.
    if any(filters.values()):
        ranks = Citation_Index.compute_ranking(filters)

    else:
        ranks = None

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, query, size, page)

    return texts
def test_filter_corpus(add_text, add_citation):

    """
    Filter on corpus as a keyword value.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus1')
    t3 = add_text(corpus='corpus2')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        corpus='corpus1'
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_filter_multiple_values(add_text, add_citation):

    """
    When a list of values is provided for a filter key, match citations that
    include _any_ of the provided values for the key.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        corpus=['corpus1', 'corpus3']
    ))

    # Count both `corpus1` and `corpus3` citations.
    assert ranks == {
        str(t1.id): 3,
        str(t3.id): 1,
    }
def test_ignore_filters_with_empty_values(empty, add_text, add_citation):

    """
    Ignore filters with empty values.
    """

    t1 = add_text(corpus='corpus1')
    t2 = add_text(corpus='corpus2')
    t3 = add_text(corpus='corpus3')

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        corpus=empty
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
def test_filter_country(add_text, add_citation, add_institution):

    """
    Filter on country as a keyword value.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution(country='USA')
    i2 = add_institution(country='CAN')

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        country='USA'
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_filter_institution(add_text, add_citation, add_institution):

    """
    Filter by institution.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    i1 = add_institution()
    i2 = add_institution()

    for i in range(3):
        c = add_citation(text=t1)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        Institution_Document.create(institution=i1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        Institution_Document.create(institution=i2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        institution_id=i1.id
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
def test_unfiltered(add_text, add_citation):

    """
    When no filters are provided, return total counts.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    for i in range(3):
        add_citation(text=t1)

    for i in range(2):
        add_citation(text=t2)

    for i in range(1):
        add_citation(text=t3)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking()

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
        str(t3.id): 1,
    }
def assigned_with(text_id, size=200):

    """
    Given a "seed" text, rank other texts assigned on the same syllabi.

    Args:
        text_id (int): The text id.

    Returns:
        dict: Elasticsearch hits.
    """

    # Get syllabi that assign the text.
    doc_ids = Citation_Index.docs_with_text(text_id)

    # Rank texts assigned by those sylalbi.
    ranks = Citation_Index.compute_ranking(dict(
        document_id=doc_ids
    ))

    # Omit the seed text.
    ranks.pop(str(text_id))

    # Materialize the text metadata.
    texts = Text_Index.materialize_ranking(ranks, size=size)

    return texts
def test_filter_subfield(
    add_text,
    add_citation,
    add_subfield,
    add_subfield_document,
):

    """
    Filter by subfield.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    sf1 = add_subfield()
    sf2 = add_subfield()

    for i in range(3):
        c = add_citation(text=t1)
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        add_subfield_document(subfield=sf2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(
        subfield_id=sf1.id
    ))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }
Beispiel #17
0
def test_filter_subfield(
    add_text,
    add_citation,
    add_subfield,
    add_subfield_document,
):
    """
    Filter by subfield.
    """

    t1 = add_text()
    t2 = add_text()
    t3 = add_text()

    sf1 = add_subfield()
    sf2 = add_subfield()

    for i in range(3):
        c = add_citation(text=t1)
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(2):
        c = add_citation(text=t2)
        add_subfield_document(subfield=sf1, document=c.document)

    for i in range(1):
        c = add_citation(text=t3)
        add_subfield_document(subfield=sf2, document=c.document)

    Citation_Index.es_insert()

    ranks = Citation_Index.compute_ranking(dict(subfield_id=sf1.id))

    assert ranks == {
        str(t1.id): 3,
        str(t2.id): 2,
    }