Example #1
0
def parse(context, data):
    urls = context.params.get('urls')
    for url in urls:
        res = context.http.get(url)
        doc = res.xml
        remove_namespace(doc, 'http://tempuri.org/sdnList.xsd')

        updated_at = doc.findtext('.//Publish_Date')
        updated_at = stringify(parse_date(updated_at, format_hint='%m/%d/%Y'))

        for entry in doc.findall('.//sdnEntry'):
            parse_entry(context, entry, url, updated_at)
Example #2
0
def crawl(context: Context):
    path = context.fetch_resource("source.xml", context.dataset.data.url)
    context.export_resource(path, "text/xml", title=context.SOURCE_TITLE)
    doc = context.parse_resource_xml(path)
    doc = h.remove_namespace(doc)
    for entry in doc.findall(".//sanctionEntity"):
        parse_entry(context, entry)
Example #3
0
def crawl(context: Context):
    path = context.fetch_resource("source.xml", context.dataset.data.url)
    context.export_resource(path, XML, title=context.SOURCE_TITLE)
    doc = context.parse_resource_xml(path)
    doc = h.remove_namespace(doc)

    for el in doc.findall(".//FinancialSanctionsTarget"):
        parse_row(context, make_row(el))
Example #4
0
def crawl(context: Context):
    path = context.fetch_resource("source.zip", context.dataset.data.url)
    context.export_resource(path,
                            "application/zip",
                            title=context.SOURCE_TITLE)
    with ZipFile(path, "r") as zip:
        for name in zip.namelist():
            if name.endswith(".xml"):
                with zip.open(name) as fh:
                    doc = etree.parse(fh)
                    doc = h.remove_namespace(doc)
                    for entry in doc.findall(".//sanctionEntity"):
                        parse_entry(context, entry)
Example #5
0
def crawl(context: Context):
    data = context.fetch_json(context.dataset.data.url)
    for ban in data.get("data", {}).get("travelBansFiles"):
        if not ban.get("fileName").endswith(".xml"):
            continue
        data_url = URL % ban.get("id")
        path = context.fetch_resource("source.xml", data_url)
        context.export_resource(path, "text/xml", title=context.SOURCE_TITLE)
        doc = context.parse_resource_xml(path)
        doc = h.remove_namespace(doc)
        for entry in doc.findall(".//sanctionEntity"):
            subject_type = entry.find("./subjectType")
            if subject_type is None:
                salvage_entity(context, entry)
                continue
            parse_entry(context, entry)
Example #6
0
def crawl(context: Context):
    path = context.fetch_resource("source.xml", context.dataset.data.url)
    context.export_resource(path, "text/xml", title=context.SOURCE_TITLE)
    doc = context.parse_resource_xml(path)
    doc = h.remove_namespace(doc)
    context.log.info("Loading reference values...")
    load_ref_values(doc)
    context.log.info("Loading locations...")
    locations = load_locations(context, doc)
    context.log.info("Loading ID reg documents...")
    documents = load_documents(doc)

    parties = {}
    for distinct_party in doc.findall(".//DistinctParty"):
        party = parse_party(context, distinct_party, locations, documents)
        parties[party.id] = party

    for entry in doc.findall(".//SanctionsEntry"):
        parse_entry(context, entry, parties)

    for relation in doc.findall(".//ProfileRelationship"):
        parse_relation(context, relation, parties)
def crawl(context: Context):
    path = context.fetch_resource("source.xml", context.dataset.data.url)
    context.export_resource(path, XML, title=context.SOURCE_TITLE)
    doc = context.parse_resource_xml(path)
    doc = h.remove_namespace(doc)
    for node in doc.findall("./entry"):
        entity = context.make("Organization")
        name = node.findtext("./title")
        entity.id = context.make_slug(node.findtext("./id"), name)
        entity.add("name", name)

        link = node.find("./link").get("href")
        entity.add("sourceUrl", link)
        aliases = node.findtext("./summary")
        if aliases != "N/A":
            aliases = aliases.split(", ")
            entity.add("alias", aliases)
        entity.add("notes", node.findtext("./content"))
        entity.add("createdAt", node.findtext("./published"))
        entity.add("modifiedAt", node.findtext("./updated"))
        entity.add("topics", "crime.terror")

        context.emit(entity, target=True)