Python Context.fetch_htmlの例

プログラミング言語: Python

名前空間/パッケージ名: opensanctions.core

クラス/型: Context

メソッド/関数: fetch_html

hotexamples.comのコード掲載数: 3

Python Context.fetch_html - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのopensanctions.core.Context.fetch_htmlの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

make(30)

make_slug(30)

export_resource(30)

fetch_resource(30)

make_id(27)

emit(22)

parse_resource_xml(13)

Context(11)

fetch_json(9)

lookup_value(9)

lookup(6)

normalize(3)

fetch_html(3)

get_resource_path(3)

pprint(2)

export(2)

close(2)

bind(1)

enrich(1)

fetch_response(1)

コード例 #1

ファイルを表示

def crawl_index(context: Context):
    params = {"_": settings.RUN_DATE}
    doc = context.fetch_html(context.dataset.url, params=params)
    for link in doc.findall(".//div[@class='sked-view']//a"):
        href = link.get("href")
        if href.endswith(".xml"):
            return href

コード例 #2

ファイルを表示

ファイル: eu_cor_members.py プロジェクト: nightsh/opennames

def crawl(context: Context):
    doc = context.fetch_html(context.dataset.data.url)

    seen = set()
    for link in doc.findall(
            './/div[@class="people"]//li//a[@class="_fullname"]'):
        url = urljoin(context.dataset.data.url, link.get("href"))
        url, _ = url.split("?", 1)
        if url in seen:
            continue
        seen.add(url)
        crawl_person(context, link.text, url)

コード例 #3

ファイルを表示

ファイル: eu_cor_members.py プロジェクト: nightsh/opennames

def crawl_person(context: Context, name, url):
    context.log.debug("Crawling member", name=name, url=url)
    doc = context.fetch_html(url)
    _, person_id = url.rsplit("/", 1)
    person = context.make("Person")
    person.id = context.make_slug(person_id)
    person.add("sourceUrl", url)
    person.add("name", name)
    person.add("topics", "role.pep")

    last_name, first_name = name.split(", ", 1)
    person.add("firstName", first_name)
    person.add("lastName", last_name)

    address = {}
    details = doc.find('.//div[@class="regular-details"]')
    for row in details.findall('.//ul[@class="no-bullet"]/li'):
        children = row.getchildren()
        title = children[0]
        title_text = collapse_spaces(stringify(title.text_content()))
        title_text = title_text or title.get("class")
        value = collapse_spaces(title.tail)
        if title_text in ("Full name:", "Address:",
                          "Declaration of interests"):
            # ignore these.
            continue
        if title_text == "Emails:":
            emails = [e.text for e in row.findall(".//a")]
            person.add("email", emails)
            continue
        if "glyphicon-phone" in title_text:
            person.add("phone", value.split(","))
            continue
        if "fa-fax" in title_text:
            # TODO: yeah, no
            # person.add("phone", value)
            continue
        if title_text in ("Web sites:", "list-inline"):
            sites = [e.get("href") for e in row.findall(".//a")]
            person.add("website", sites)
            continue
        if title_text == "Represented Country:":
            person.add("country", value)
            continue
        if title_text == "Languages:":
            # TODO: missing in FtM
            # person.add("languages", value.split(','))
            continue
        if "Regions since:" in title_text:
            date = h.parse_date(value, FORMATS)
            person.add("createdAt", date)
            continue
        if "Date of birth:" in title_text:
            person.add("birthDate", h.parse_date(value, FORMATS))
            continue
        if "Commissions:" in title_text:
            for com in row.findall(".//li"):
                text = collapse_spaces(com.text_content())
                sep = "Mandate - "
                if sep in text:
                    _, text = text.split(sep, 1)
                person.add("sector", text)
            continue
        if "Areas of interest:" in title_text:
            for area in row.findall(".//li"):
                person.add("keywords", area.text_content())
            continue
        if title.tag == "i" and value is None:
            person.add("position", title_text)
            continue
        if title_text in ("Country:"):
            person.add("country", value)
        if title_text in ("Street:", "Postal code:", "City:", "Country:"):
            address[title_text.replace(":", "")] = value
            continue
        if title_text == "Political group:":
            group = context.make("Organization")
            group.add("name", value)
            slug = value
            if "(" in slug:
                _, slug = slug.rsplit("(", 1)
            slug = slugify(slug, sep="-")
            group.id = f"eu-cor-group-{slug}"
            context.emit(group)
            member = context.make("Membership")
            member.id = context.make_id("Membership", person.id, group.id)
            member.add("member", person)
            member.add("organization", group)
            context.emit(member)
            continue

    address = h.make_address(
        context,
        street=address.get("Street"),
        city=address.get("City"),
        postal_code=address.get("Posal code"),
        country=address.get("Country"),
    )
    h.apply_address(context, person, address)
    context.emit(person, target=True)