Exemple #1
0
def scrape_grants_for_fy(year):
    b.open(PAST_GRANTS_URL)

    try:
        b.select_form(name="Form1")

        b["oUcStartDate$ddlDay"] = ["1"]
        b["oUcStartDate$ddlMonth"] = ["4"]
        b["oUcStartDate$ddlYear"] = [str(year)]

        b["oUcEndDate$ddlDay"] = ["31"]
        b["oUcEndDate$ddlMonth"] = ["3"]
        b["oUcEndDate$ddlYear"] = [str(year + 1)]

        resp = b.submit()
    except mechanize._form.ItemNotFoundError:
        print("ERROR: could not submit form. This usually means you're "
              "trying to scrape for a year that doesn't exist "
              "on the GOTW website.", file=sys.stderr)
        raise

    page = PyQuery(resp.read())

    for r in page("table tr:not(.GridHeader)"):
        grant = {}
        anchors = PyQuery(r).find('a')

        grant['id'] = anchors.eq(0).attr.title
        grant['title'] = anchors.eq(0).text()

        grant['pi'] = pi = {}
        pi['id'] = util.extract_id(anchors.eq(1).attr.href, 'Person')
        pi['name'] = anchors.eq(1).text()

        grant['organisation'] = org = {}
        org['id'] = util.extract_id(anchors.eq(2).attr.href, 'Organisation')
        org['name'] = anchors.eq(2).text()

        grant['department'] = dept = {}
        dept['id'] = util.extract_id(anchors.eq(3).attr.href, 'Department')
        dept['name'] = anchors.eq(3).text()

        value = PyQuery(r).find('span').eq(0).attr.title
        grant['value'] = util.extract_monetary_value(value)

        yield grant
Exemple #2
0
def _extract_multiple_ids(elem, type):
    res = []

    for el in (PyQuery(x) for x in elem.find("a")):
        o = {}
        o["id"] = util.extract_id(el.attr.href, type)
        o["name"] = el.text()
        res.append(o)

    return res
Exemple #3
0
def _scrape_pi(g, el):
    pi_el = el.find('a#hlPrincipalInvestigator + a').eq(0)
    g['pi'] = pi = {}
    pi['id'] = util.extract_id(pi_el.attr.href, 'Person')
    pi['name'] = pi_el.text()
Exemple #4
0
def _scrape_pi(g, el):
    pi_el = el.find("a#hlPrincipalInvestigator + a").eq(0)
    g["pi"] = pi = {}
    pi["id"] = util.extract_id(pi_el.attr.href, "Person")
    pi["name"] = pi_el.text()
Exemple #5
0
def _scrape_departments(o, el):
    o["departments"] = ds = []
    for e in (PyQuery(x) for x in el.find("table#dgDetails tr td a")):
        if e.attr.href.startswith("NGBOViewDepartment.aspx?DepartmentId="):
            ds.append({"id": util.extract_id(e.attr.href, "Department"), "name": e.text()})