Example #1
0
def test_institution_counts(models):

    """
    Document_Institution.institution_counts() should provide syllabus counts
    for each institution id.
    """

    i1 = Institution.create()
    i2 = Institution.create()
    i3 = Institution.create()

    d1 = Document.create(path='d1')
    d2 = Document.create(path='d2')
    d3 = Document.create(path='d3')
    d4 = Document.create(path='d4')
    d5 = Document.create(path='d5')
    d6 = Document.create(path='d6')

    # 1 document for institution 1.
    Document_Institution.create(institution=i1, document=d1)

    # 2 documents for institution 2.
    Document_Institution.create(institution=i2, document=d2)
    Document_Institution.create(institution=i2, document=d3)

    # 3 documents for institution 3.
    Document_Institution.create(institution=i3, document=d4)
    Document_Institution.create(institution=i3, document=d5)
    Document_Institution.create(institution=i3, document=d6)

    assert Document_Institution.institution_counts() == {
        d1.id: 1,
        d2.id: 2,
        d3.id: 3,
    }
Example #2
0
File: inst.py Project: overview/osp
def insert_institutions():

    """
    Insert institution rows.
    """

    Institution.insert_institutions()
Example #3
0
def cited(out_file):

    """
    CSV with institution id, name, and citation count.
    """

    # CSV writer.
    cols = ["id", "count", "name"]
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    count = fn.Count(HLOM_Citation.id)

    cited = (
        Institution.select(Institution, count)
        .join(Document_Institution)
        # Join citations.
        .join(HLOM_Citation, on=(Document_Institution.document == HLOM_Citation.document))
        .group_by(Institution.id)
        .order_by(count.desc())
    )

    for inst in cited.naive():

        writer.writerow({"count": inst.count, "id": inst.id, "name": inst.metadata["Institution_Name"]})
Example #4
0
File: inst.py Project: overview/osp
def queue_geocode():

    """
    Queue geocoding tasks in the worker.

    :param in_file: A handle on the input CSV.
    """

    for inst in Institution.select():
        config.rq.enqueue(geocode, inst.id)
Example #5
0
def test_state_abbreviations(add_hlom, add_doc):

    """
    HLOM_Citation.index_state() should denormalize state abbreviations.
    """

    t1 = add_hlom()
    t2 = add_hlom()
    t3 = add_hlom()

    s1 = add_doc('syllabus1')
    s2 = add_doc('syllabus2')
    s3 = add_doc('syllabus3')

    c1 = HLOM_Citation.create(document=s1, record=t1)
    c2 = HLOM_Citation.create(document=s2, record=t2)
    c3 = HLOM_Citation.create(document=s3, record=t3)

    # Create institutions with states.
    AL = Institution.create(metadata={'Institution_State': 'AL'})
    CT = Institution.create(metadata={'Institution_State': 'CT'})
    CA = Institution.create(metadata={'Institution_State': 'CA'})

    # Link documents -> institutions.
    Document_Institution.create(document=t1, institution=AL)
    Document_Institution.create(document=t2, institution=CT)
    Document_Institution.create(document=t3, institution=CA)

    HLOM_Citation.index_institutions()

    c1 = HLOM_Citation.reload(c1)
    c2 = HLOM_Citation.reload(c2)
    c3 = HLOM_Citation.reload(c3)

    assert c1.state == 'AL'
    assert c2.state == 'CT'
    assert c3.state == 'CA'
Example #6
0
def test_institution_ids(add_hlom, add_doc):

    """
    HLOM_Citation.index_state() should denormalize institution ids.
    """

    t1 = add_hlom()
    t2 = add_hlom()
    t3 = add_hlom()

    s1 = add_doc('syllabus1')
    s2 = add_doc('syllabus2')
    s3 = add_doc('syllabus3')

    c1 = HLOM_Citation.create(document=s1, record=t1)
    c2 = HLOM_Citation.create(document=s2, record=t2)
    c3 = HLOM_Citation.create(document=s3, record=t3)

    i1 = Institution.create()
    i2 = Institution.create()
    i3 = Institution.create()

    # Link documents -> institutions.
    Document_Institution.create(document=t1, institution=i1)
    Document_Institution.create(document=t2, institution=i2)
    Document_Institution.create(document=t3, institution=i3)

    HLOM_Citation.index_institutions()

    c1 = HLOM_Citation.reload(c1)
    c2 = HLOM_Citation.reload(c2)
    c3 = HLOM_Citation.reload(c3)

    assert c1.institution == i1
    assert c2.institution == i2
    assert c3.institution == i3
Example #7
0
def geocode(id):

    """
    Geocode an institution.

    Args:
        id (int): The institution id.
    """

    coder = OpenMapQuest(config['mapquest']['api_key'])
    inst = Institution.get(Institution.id==id)

    # Geocode.
    location = coder.geocode(inst.geocoding_query, timeout=10)

    if location:

        # Write the coordinate.
        inst.metadata['Latitude']  = location.latitude
        inst.metadata['Longitude'] = location.longitude
        inst.save()
Example #8
0
def lonlats(out_file):

    """
    CSV with institution name and lon/lat, for Fusion Tables.
    """

    # CSV writer.
    cols = ["name", "longitude", "latitude"]
    writer = csv.DictWriter(out_file, cols)
    writer.writeheader()

    # Select rows with coordinates.
    geocoded = Institution.select().where(Institution.metadata.contains("Latitude"))

    for inst in geocoded:

        writer.writerow(
            {
                "name": inst.metadata["Institution_Name"],
                "longitude": inst.metadata["Longitude"],
                "latitude": inst.metadata["Latitude"],
            }
        )