def _serialize(self, obj): pk = obj.get("id") collection_id = obj.pop("collection_id", None) obj["collection"] = self.resolve( Collection, collection_id, CollectionSerializer ) proxy = model.get_proxy(obj) properties = obj.get("properties", {}) for prop in proxy.iterprops(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity or value) links = { "self": url_for("entities_api.view", entity_id=pk), "references": url_for("entities_api.references", entity_id=pk), "tags": url_for("entities_api.tags", entity_id=pk), "ui": entity_url(pk), } if proxy.schema.is_a(Document.SCHEMA): content_hash = first(properties.get("contentHash")) if content_hash: name = entity_filename(proxy) mime = first(properties.get("mimeType")) links["file"] = archive_url( content_hash, file_name=name, mime_type=mime, expire=request.authz.expire, ) pdf_hash = first(properties.get("pdfHash")) if pdf_hash: name = entity_filename(proxy, extension="pdf") links["pdf"] = archive_url( pdf_hash, file_name=name, mime_type=PDF, expire=request.authz.expire, ) csv_hash = first(properties.get("csvHash")) if csv_hash: name = entity_filename(proxy, extension="csv") links["csv"] = archive_url( csv_hash, file_name=name, mime_type=CSV, expire=request.authz.expire, ) obj["links"] = links obj["latinized"] = transliterate_values(proxy) obj["writeable"] = check_write_entity(obj, request.authz) obj["shallow"] = obj.get("shallow", True) return obj
def _serialize(self, obj): pk = obj.get('id') obj['id'] = str(pk) authz = request.authz collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) schema = model.get(obj.get('schema')) if schema is None: return None obj['schemata'] = schema.names properties = obj.get('properties', {}) for prop in schema.properties.values(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) file_name = first(properties.get('fileName')) content_hash = first(properties.get('contentHash')) if content_hash: mime_type = first(properties.get('mimeType')) name = safe_filename(file_name, default=pk) links['file'] = archive_url(request.authz.id, content_hash, file_name=name, mime_type=mime_type) pdf_hash = first(properties.get('pdfHash')) if pdf_hash: name = safe_filename(file_name, default=pk, extension='.pdf') links['pdf'] = archive_url(request.authz.id, pdf_hash, file_name=name, mime_type=PDF) csv_hash = first(properties.get('csvHash')) if csv_hash: name = safe_filename(file_name, default=pk, extension='.csv') links['csv'] = archive_url(request.authz.id, csv_hash, file_name=name, mime_type=CSV) obj['links'] = links obj['writeable'] = authz.can(collection_id, authz.WRITE) obj.pop('_index', None) return self._clean_response(obj)
def _serialize(self, obj): pk = obj.get('id') collection_id = obj.pop('collection_id', None) obj['collection'] = self.resolve(Collection, collection_id, CollectionSerializer) proxy = model.get_proxy(obj) obj['schemata'] = proxy.schema.names properties = obj.get('properties', {}) for prop in proxy.iterprops(): if prop.type != registry.entity: continue values = ensure_list(properties.get(prop.name)) properties[prop.name] = [] for value in values: entity = self.resolve(Entity, value, EntitySerializer) properties[prop.name].append(entity or value) links = { 'self': url_for('entities_api.view', entity_id=pk), 'references': url_for('entities_api.references', entity_id=pk), 'tags': url_for('entities_api.tags', entity_id=pk), 'ui': entity_url(pk) } if proxy.schema.is_a(Document.SCHEMA): links['content'] = url_for('entities_api.content', entity_id=pk) content_hash = first(properties.get('contentHash')) if content_hash: name = entity_filename(proxy) mime_type = first(properties.get('mimeType')) links['file'] = archive_url(request.authz.id, content_hash, file_name=name, mime_type=mime_type) pdf_hash = first(properties.get('pdfHash')) if pdf_hash: name = entity_filename(proxy, extension='pdf') links['pdf'] = archive_url(request.authz.id, pdf_hash, file_name=name, mime_type=PDF) csv_hash = first(properties.get('csvHash')) if csv_hash: name = entity_filename(proxy, extension='csv') links['csv'] = archive_url(request.authz.id, csv_hash, file_name=name, mime_type=CSV) obj['links'] = links write = request.authz.WRITE obj['writeable'] = request.authz.can(collection_id, write) return obj
def format_proxy(proxy, collection): """Apply final denormalisations to the index.""" # Abstract entities can appear when profile fragments for a missing entity # are present. if proxy.schema.abstract: return None data = proxy.to_full_dict() data["schemata"] = list(proxy.schema.names) data["caption"] = proxy.caption names = data.get("names", []) fps = set([fingerprints.generate(name) for name in names]) fps.update(names) data["fingerprints"] = [fp for fp in fps if fp is not None] # Slight hack: a magic property in followthemoney that gets taken out # of the properties and added straight to the index text. properties = data.get("properties") data["text"] = properties.pop("indexText", []) # integer casting numeric = {} for prop in proxy.iterprops(): if prop.type in NUMERIC_TYPES: values = proxy.get(prop) numeric[prop.name] = _numeric_values(prop.type, values) # also cast group field for dates numeric["dates"] = _numeric_values(registry.date, data.get("dates")) data["numeric"] = numeric # Context data - from aleph system, not followthemoney. data["collection_id"] = collection.id data["role_id"] = first(data.get("role_id")) data["profile_id"] = first(data.get("profile_id")) data["mutable"] = max(ensure_list(data.get("mutable")), default=False) data["origin"] = ensure_list(data.get("origin")) # Logical simplifications of dates: created_at = ensure_list(data.get("created_at")) if len(created_at) > 0: data["created_at"] = min(created_at) updated_at = ensure_list(data.get("updated_at")) or created_at if len(updated_at) > 0: data["updated_at"] = max(updated_at) # log.info("%s", pformat(data)) entity_id = data.pop("id") return { "_id": entity_id, "_index": entities_write_index(proxy.schema), "_source": data, }
def make_filename(self, entity): """Some of the file importers actually care about the file extension, so this is trying to make sure we use a temporary file name that has an appropriate extension.""" for file_name in entity.get('fileName', quiet=True): _, extension = os.path.splitext(file_name) if len(extension): return safe_filename(file_name) extension = first(entity.get('extension', quiet=True)) if extension is None: mime_type = first(entity.get('mimeType', quiet=True)) if mime_type is not None: extension = guess_extension(mime_type) extension = extension or 'bin' return safe_filename('data', extension=extension)
def pick(self, values: Sequence[str]) -> Optional[str]: """From a set of names, pick the most plausible user-facing one.""" # Sort to get stable results when it's a coin toss: values = sorted(values) if not len(values): return None normalised = [] lookup: Dict[str, List[str]] = {} # We're doing this in two stages, to avoid name forms with varied casing # (e.g. Smith vs. SMITH) are counted as widly different, leading to # implausible median outcomes. for value in values: norm = slugify(value, sep=" ") if norm is None: continue normalised.append(norm) lookup.setdefault(norm, []) lookup[norm].append(value) norm = setmedian(normalised) if norm is None: return None forms = lookup.get(norm, []) if len(forms) <= 1: return first(forms) return cast(str, setmedian(forms))
def expand(self, objs, many=False): cache = {} for obj in ensure_list(objs): for (field, type_, _, _, _) in self.EXPAND: type_ = self._type_dispatch(type_) for key in self._get_values(obj, field): cache[(type_, key)] = None self._resolve_roles(cache) self._resolve_index(cache) for obj in ensure_list(objs): for (field, type_, target, schema, multi) in self.EXPAND: value = [] type_ = self._type_dispatch(type_) for key in self._get_values(obj, field): value.append(cache.get((type_, key))) if not multi: value = first(value) obj.pop(field, None) if value is not None: value, _ = schema().dump(value, many=multi) obj[target] = value
def get_table_csv_link(table_id): table = get_entity(table_id) properties = table.get('properties', {}) csv_hash = first(properties.get('csvHash')) if csv_hash is None: raise RuntimeError("Source table doesn't have a CSV version") url = archive.generate_url(csv_hash) if not url: local_path = archive.load_file(csv_hash) if local_path is not None: url = local_path.as_posix() if url is None: raise RuntimeError("Could not generate CSV URL for the table") return url
def make_mapper(collection, mapping): table = get_entity(mapping.table_id) properties = table.get('properties', {}) csv_hash = first(properties.get('csvHash')) if csv_hash is None: raise RuntimeError("Source table doesn't have a CSV version") url = archive.generate_url(csv_hash) if not url: local_path = archive.load_file(csv_hash) if local_path is not None: url = local_path.as_posix() if url is None: raise RuntimeError("Could not generate CSV URL for the table") data = {'csv_url': url, 'entities': mapping.query} return model.make_mapping(data, key_prefix=collection.foreign_id)
def load_locations(context: Context, doc): locations = {} for location in doc.findall("./Locations/Location"): location_id = location.get("ID") countries = set() for area in location.findall("./LocationAreaCode"): area_code = ref_get("AreaCode", area.get("AreaCodeID")) countries.add(area_code.get("Description")) for country in location.findall("./LocationCountry"): country_obj = ref_get("Country", country.get("CountryID")) countries.add(country_obj.get("Value")) if len(countries) > 1: context.log.warn("Multiple countries", countries=countries) parts = {} for part in location.findall("./LocationPart"): type_ = ref_value("LocPartType", part.get("LocPartTypeID")) parts[type_] = part.findtext("./LocationPartValue/Value") country = first(countries) unknown = parts.get("Unknown") if registry.country.clean(unknown, fuzzy=True): country = unknown if country == "undetermined": country = unknown = None address = h.make_address( context, full=unknown, street=parts.get("ADDRESS1"), street2=parts.get("ADDRESS2"), street3=parts.get("ADDRESS3"), city=parts.get("CITY"), postal_code=parts.get("POSTAL CODE"), region=parts.get("REGION"), state=parts.get("STATE/PROVINCE"), country=country, ) if address.id is not None: context.emit(address) locations[location_id] = address return locations
def expand(self, objs, many=False): cache = {} for obj in ensure_list(objs): for (field, type_, target, schema, multi) in self.EXPAND: value = [] for key in self._get_values(obj, field): if (type_, key) not in cache: cache[(type_, key)] = self._get_object(type_, key) value.append(cache.get((type_, key))) if not multi: value = first(value) obj.pop(field, None) if value is not None: value, _ = schema().dump(value, many=multi) obj[target] = value
def index(collection_id): """Returns a list of mappings for the collection and table. --- get: summary: List mappings parameters: - description: The collection id. in: path name: collection_id required: true schema: minimum: 1 type: integer - description: The table id. in: query name: table schema: type: string requestBody: responses: '200': content: application/json: schema: type: object allOf: - $ref: '#/components/schemas/QueryResponse' properties: results: type: array items: $ref: '#/components/schemas/Mapping' description: OK tags: - Collection - Mapping """ collection = get_db_collection(collection_id) parser = QueryParser(request.args, request.authz) table_id = first(parser.filters.get("table")) q = Mapping.by_collection(collection.id, table_id=table_id) result = DatabaseQueryResult(request, q, parser=parser) return MappingSerializer.jsonify_result(result)
def format_proxy(proxy, collection): """Apply final denormalisations to the index.""" data = proxy.to_full_dict() data['schemata'] = list(proxy.schema.names) names = ensure_list(data.get('names')) fps = set([fingerprints.generate(name) for name in names]) fps.update(names) data['fingerprints'] = [fp for fp in fps if fp is not None] # Slight hack: a magic property in followthemoney that gets taken out # of the properties and added straight to the index text. properties = data.get('properties') text = properties.pop('indexText', []) text.extend(fps) data['text'] = text # integer casting numeric = {} for prop in proxy.iterprops(): if prop.type in NUMERIC_TYPES: values = proxy.get(prop) numeric[prop.name] = _numeric_values(prop.type, values) # also cast group field for dates numeric['dates'] = _numeric_values(registry.date, data.get('dates')) data['numeric'] = numeric # Context data - from aleph system, not followthemoney. now = iso_text(datetime.utcnow()) data['created_at'] = min(ensure_list(data.get('created_at')), default=now) data['updated_at'] = min(ensure_list(data.get('updated_at')), default=now) # FIXME: Can there ever really be multiple role_ids? data['role_id'] = first(data.get('role_id')) data['mutable'] = max(ensure_list(data.get('mutable')), default=False) data['origin'] = ensure_list(data.get('origin')) data['collection_id'] = collection.id # log.info("%s", pformat(data)) entity_id = data.pop('id') return { '_id': entity_id, '_index': entities_write_index(data.get('schema')), '_source': data }
def format_proxy(proxy, collection): """Apply final denormalisations to the index.""" data = proxy.to_full_dict() data["schemata"] = list(proxy.schema.names) names = data.get("names", []) fps = set([fingerprints.generate(name) for name in names]) fps.update(names) data["fingerprints"] = [fp for fp in fps if fp is not None] # Slight hack: a magic property in followthemoney that gets taken out # of the properties and added straight to the index text. properties = data.get("properties") text = properties.pop("indexText", []) text.extend(fps) data["text"] = text # integer casting numeric = {} for prop in proxy.iterprops(): if prop.type in NUMERIC_TYPES: values = proxy.get(prop) numeric[prop.name] = _numeric_values(prop.type, values) # also cast group field for dates numeric["dates"] = _numeric_values(registry.date, data.get("dates")) data["numeric"] = numeric # Context data - from aleph system, not followthemoney. # FIXME: Can there ever really be multiple role_ids? data["role_id"] = first(data.get("role_id")) data["mutable"] = max(ensure_list(data.get("mutable")), default=False) data["origin"] = ensure_list(data.get("origin")) created_at = data.get("created_at") if created_at: data["updated_at"] = data.get("updated_at", created_at) data["collection_id"] = collection.id # log.info("%s", pformat(data)) entity_id = data.pop("id") return { "_id": entity_id, "_index": entities_write_index(data.get("schema")), "_source": data, }
def pick(self, values): values = [sanitize_text(v) for v in ensure_list(values)] values = [v for v in values if v is not None] if len(values) <= 1: return first(values) return setmedian(sorted(values))
def parse_row(context: Context, row): group_type = row.pop("GroupTypeDescription") schema = TYPES.get(group_type) if schema is None: context.log.error("Unknown group type", group_type=group_type) return entity = context.make(schema) entity.id = context.make_slug(row.pop("GroupID")) sanction = h.make_sanction(context, entity) sanction.add("program", row.pop("RegimeName")) sanction.add("authority", row.pop("ListingType", None)) listed_date = h.parse_date(row.pop("DateListed"), FORMATS) sanction.add("listingDate", listed_date) designated_date = h.parse_date(row.pop("DateDesignated"), FORMATS) sanction.add("startDate", designated_date) entity.add("createdAt", listed_date) if not entity.has("createdAt"): entity.add("createdAt", designated_date) sanction.add("authorityId", row.pop("UKSanctionsListRef", None)) sanction.add("unscId", row.pop("UNRef", None)) sanction.add("status", row.pop("GroupStatus", None)) sanction.add("reason", row.pop("UKStatementOfReasons", None)) last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS) sanction.add("modifiedAt", last_updated) entity.add("modifiedAt", last_updated) # TODO: derive topics and schema from this?? entity_type = row.pop("Entity_Type", None) entity.add_cast("LegalEntity", "legalForm", entity_type) reg_number = row.pop("Entity_BusinessRegNumber", None) entity.add_cast("LegalEntity", "registrationNumber", reg_number) row.pop("Ship_Length", None) entity.add_cast("Vessel", "flag", row.pop("Ship_Flag", None)) flags = split_new(row.pop("Ship_PreviousFlags", None)) entity.add_cast("Vessel", "pastFlags", flags) entity.add_cast("Vessel", "type", row.pop("Ship_Type", None)) entity.add_cast("Vessel", "tonnage", row.pop("Ship_Tonnage", None)) entity.add_cast("Vessel", "buildDate", row.pop("Ship_YearBuilt", None)) entity.add_cast("Vessel", "imoNumber", row.pop("Ship_IMONumber", None)) ship_owner = row.pop("Ship_CurrentOwners", None) if ship_owner is not None: owner = context.make("LegalEntity") owner.id = context.make_slug("named", ship_owner) owner.add("name", ship_owner) context.emit(owner) ownership = context.make("Ownership") ownership.id = context.make_id(entity.id, "owns", owner.id) ownership.add("owner", owner) ownership.add("asset", entity) context.emit(ownership) countries = parse_countries(row.pop("Country", None)) entity.add("country", countries) title = split_items(row.pop("Title", None)) entity.add("title", title, quiet=True) pobs = split_items(row.pop("Individual_TownOfBirth", None)) entity.add_cast("Person", "birthPlace", pobs) dob = h.parse_date(row.pop("Individual_DateOfBirth", None), FORMATS) entity.add_cast("Person", "birthDate", dob) cob = parse_countries(row.pop("Individual_CountryOfBirth", None)) entity.add_cast("Person", "country", cob) nationalities = parse_countries(row.pop("Individual_Nationality", None)) entity.add_cast("Person", "nationality", nationalities) positions = split_items(row.pop("Individual_Position", None)) entity.add_cast("Person", "position", positions) entity.add_cast("Person", "gender", row.pop("Individual_Gender", None)) name_type = row.pop("AliasType", None) name_prop = NAME_TYPES.get(name_type) if name_prop is None: context.log.warning("Unknown name type", type=name_type) return name_quality = row.pop("AliasQuality", None) is_weak = WEAK_QUALITY.get(name_quality) if is_weak is None: context.log.warning("Unknown name quality", quality=name_quality) return h.apply_name( entity, name1=row.pop("name1", None), name2=row.pop("name2", None), name3=row.pop("name3", None), name4=row.pop("name4", None), name5=row.pop("name5", None), tail_name=row.pop("Name6", None), name_prop=name_prop, is_weak=is_weak, quiet=True, ) entity.add("alias", row.pop("NameNonLatinScript", None)) full_address = join_text( row.pop("Address1", None), row.pop("Address2", None), row.pop("Address3", None), row.pop("Address4", None), row.pop("Address5", None), row.pop("Address6", None), sep=", ", ) address = h.make_address( context, full=full_address, postal_code=row.pop("PostCode", None), country=first(countries), ) h.apply_address(context, entity, address) passport_number = row.pop("Individual_PassportNumber", None) passport_numbers = split_items(passport_number) entity.add_cast("Person", "passportNumber", passport_numbers) passport_detail = row.pop("Individual_PassportDetails", None) # passport_details = split_items(passport_detail) # TODO: where do I stuff this? ni_number = row.pop("Individual_NINumber", None) ni_numbers = split_items(ni_number) entity.add_cast("Person", "idNumber", ni_numbers) ni_detail = row.pop("Individual_NIDetails", None) # ni_details = split_items(ni_detail) # TODO: where do I stuff this? for phone in split_new(row.pop("PhoneNumber", None)): entity.add_cast("LegalEntity", "phone", phone) for email in split_new(row.pop("EmailAddress", None)): entity.add_cast("LegalEntity", "email", email) for website in split_new(row.pop("Website", None)): entity.add_cast("LegalEntity", "website", website) for name in parse_companies(context, row.pop("Entity_ParentCompany", None)): parent = context.make("Organization") parent.id = context.make_slug("named", name) parent.add("name", name) context.emit(parent) ownership = context.make("Ownership") ownership.id = context.make_id(entity.id, "owns", parent.id) ownership.add("owner", parent) ownership.add("asset", entity) context.emit(ownership) for name in parse_companies(context, row.pop("Entity_Subsidiaries", None)): subsidiary = context.make("Company") subsidiary.id = context.make_slug("named", name) subsidiary.add("name", name) context.emit(subsidiary) ownership = context.make("Ownership") ownership.id = context.make_id(entity.id, "owns", subsidiary.id) ownership.add("owner", entity) ownership.add("asset", subsidiary) context.emit(ownership) grp_status = row.pop("GrpStatus", None) if grp_status != "A": context.log.warning("Unknown GrpStatus", value=grp_status) entity.add("notes", h.clean_note(row.pop("OtherInformation", None))) h.audit_data(row, ignore=["NonLatinScriptLanguage", "NonLatinScriptType"]) entity.add("topics", "sanction") context.emit(entity, target=True) context.emit(sanction)
def parse_row(context, row): group_type = row.pop("GroupTypeDescription") org_type = row.pop("OrgType", None) if group_type == "Individual": base_schema = "Person" elif row.get("TypeOfVessel") is not None: base_schema = "Vessel" elif group_type == "Entity": base_schema = context.lookup_value("org_type", org_type, "Organization") else: context.log.error("Unknown entity type", group_type=group_type) return entity = context.make(base_schema) entity.id = context.make_slug(row.pop("GroupID")) if org_type is not None: org_types = split_items(org_type) entity.add_cast("LegalEntity", "legalForm", org_types) sanction = h.make_sanction(context, entity) # entity.add("position", row.pop("Position"), quiet=True) entity.add("notes", row.pop("OtherInformation", None), quiet=True) entity.add("notes", row.pop("FurtherIdentifiyingInformation", None), quiet=True) sanction.add("program", row.pop("RegimeName")) sanction.add("authority", row.pop("ListingType", None)) sanction.add("startDate", h.parse_date(row.pop("DateListed"), FORMATS)) sanction.add("recordId", row.pop("FCOId", None)) sanction.add("status", row.pop("GroupStatus", None)) sanction.add("reason", row.pop("UKStatementOfReasons", None)) last_updated = h.parse_date(row.pop("LastUpdated"), FORMATS) if last_updated is not None: sanction.add("modifiedAt", last_updated) sanction.context["updated_at"] = last_updated entity.add("modifiedAt", last_updated) entity.context["updated_at"] = last_updated # DoB is sometimes a year only row.pop("DateOfBirth", None) dob = parse_parts( row.pop("YearOfBirth", 0), row.pop("MonthOfBirth", 0), row.pop("DayOfBirth", 0), ) entity.add_cast("Person", "birthDate", dob) gender = h.clean_gender(row.pop("Gender", None)) entity.add_cast("Person", "gender", gender) id_number = row.pop("NationalIdNumber", None) entity.add_cast("LegalEntity", "idNumber", split_items(id_number)) passport = row.pop("PassportDetails", None) entity.add_cast("Person", "passportNumber", split_items(passport)) flag = row.pop("FlagOfVessel", None) entity.add_cast("Vessel", "flag", flag) prev_flag = row.pop("PreviousFlags", None) entity.add_cast("Vessel", "pastFlags", prev_flag) year = row.pop("YearBuilt", None) entity.add_cast("Vehicle", "buildDate", year) type_ = row.pop("TypeOfVessel", None) entity.add_cast("Vehicle", "type", type_) imo = row.pop("IMONumber", None) entity.add_cast("Vessel", "imoNumber", imo) tonnage = row.pop("TonnageOfVessel", None) entity.add_cast("Vessel", "tonnage", tonnage) row.pop("LengthOfVessel", None) # entity.add("legalForm", org_type) title = split_items(row.pop("NameTitle", None)) entity.add("title", title, quiet=True) entity.add("firstName", row.pop("name1", None), quiet=True) entity.add("secondName", row.pop("name2", None), quiet=True) entity.add("middleName", row.pop("name3", None), quiet=True) entity.add("middleName", row.pop("name4", None), quiet=True) entity.add("middleName", row.pop("name5", None), quiet=True) name6 = row.pop("Name6", None) entity.add("lastName", name6, quiet=True) full_name = row.pop("FullName", name6) row.pop("AliasTypeName") if row.pop("AliasType") == "AKA": entity.add("alias", full_name) else: entity.add("name", full_name) nationalities = parse_countries(row.pop("Nationality", None)) entity.add("nationality", nationalities, quiet=True) position = split_items(row.pop("Position", None)) entity.add("position", position, quiet=True) birth_countries = parse_countries(row.pop("CountryOfBirth", None)) entity.add("country", birth_countries, quiet=True) countries = parse_countries(row.pop("Country", None)) entity.add("country", countries) pob = split_items(row.pop("TownOfBirth", None)) entity.add("birthPlace", pob, quiet=True) address = h.make_address( context, full=row.pop("FullAddress", None), street=row.pop("address1", None), street2=row.pop("address2", None), street3=row.pop("address3", None), city=row.pop("address4", None), place=row.pop("address5", None), region=row.pop("address6", None), postal_code=row.pop("PostCode", None), country=first(countries), ) h.apply_address(context, entity, address) reg_number = row.pop("BusinessRegNumber", None) entity.add_cast("LegalEntity", "registrationNumber", reg_number) phones = split_items(row.pop("PhoneNumber", None), comma=True) phones = h.clean_phones(phones) entity.add_cast("LegalEntity", "phone", phones) website = split_items(row.pop("Website", None), comma=True) entity.add_cast("LegalEntity", "website", website) emails = split_items(row.pop("EmailAddress", None), comma=True) emails = h.clean_emails(emails) entity.add_cast("LegalEntity", "email", emails) # TODO: graph row.pop("Subsidiaries", None) row.pop("ParentCompany", None) row.pop("CurrentOwners", None) row.pop("DateListedDay", None) row.pop("DateListedMonth", None) row.pop("DateListedYear", None) row.pop("LastUpdatedDay", None) row.pop("LastUpdatedMonth", None) row.pop("LastUpdatedYear", None) row.pop("GrpStatus", None) row.pop("ID", None) row.pop("DateOfBirthId", None) row.pop("DateListedDay", None) if len(row): pprint(row) entity.add("topics", "sanction") context.emit(entity, target=True, unique=True) context.emit(sanction)