def handle_places(self, data): base_uri = self.helper.make_proj_uri('PLACE', '') for loc in data.get('locations', []): l = loc.get('location') if l: current = parse_location_name(l, uri_base=self.helper.proj_prefix) place_data = self.helper.make_place(current, base_uri=base_uri) data['places'].append(place_data) note = loc.get('location_note') if note: note = vocab.Note(ident='', content=note) data['referred_to_by'].append(note) date = loc.get('location_date') if date: note = vocab.BibliographyStatement( ident='', content=f'Residence in {l} ({date})') data['referred_to_by'].append(note) address = loc.get('address') if address: contact = model.Identifier(ident='', content=address) contact_data = add_crom_data(data={}, what=contact) data['contact_point'].append(contact_data) note = loc.get('address_note') if note: note = vocab.Note(ident='', content=note) data['referred_to_by'].append(note) date = loc.get('address_date') if date: note = vocab.BibliographyStatement( ident='', content=f'Address at {l} ({date})') data['referred_to_by'].append(note)
def handle_statements(self, data): text_content = data.get('text') if text_content: cite = vocab.BiographyStatement(ident='', content=text_content) data['referred_to_by'].append(cite) source_content = data.get('source') if source_content: cite = vocab.BibliographyStatement(ident='', content=source_content) data['referred_to_by'].append(cite) project = data.get('project') if project: data['referred_to_by'].append( vocab.SourceStatement(ident='', content=project)) awards = { l.strip() for l in data.get('medal_received', '').split(';') } - {''} for award in awards: cite = vocab.Note( ident='', content=award ) # TODO: add proper classification for an Awards Statement data['referred_to_by'].append(cite)
def extract_monetary_amount(data, add_citations=False, currency_mapping=CURRENCY_MAPPING, source_mapping=None, truncate_label_digits=2): ''' Returns a `MonetaryAmount`, `StartingPrice`, or `EstimatedPrice` object based on properties of the supplied `data` dict. If no amount or currency data is found in found, returns `None`. For `EstimatedPrice`, values will be accessed from these keys: - amount: `est_price_amount` or `est_price` - currency: `est_price_currency` or `est_price_curr` - note: `est_price_note` or `est_price_desc` - bibliographic statement: `est_price_citation` For `StartingPrice`, values will be accessed from these keys: - amount: `start_price_amount` or `start_price` - currency: `start_price_currency` or `start_price_curr` - note: `start_price_note` or `start_price_desc` - bibliographic statement: `start_price_citation` For `MonetaryAmount` prices, values will be accessed from these keys: - amount: `price_amount` or `price` - currency: `price_currency` or `price_curr` - note: `price_note` or `price_desc` - bibliographic statement: `price_citation` ''' amount_type = 'Price' if 'price' in data or 'price_amount' in data or 'amount' in data: amnt = model.MonetaryAmount(ident='') price_amount = data.get('price_amount', data.get('price', data.get('amount'))) price_currency = data.get( 'currency', data.get('price_currency', data.get('price_curr'))) note = data.get('price_note', data.get('price_desc', data.get('note'))) cite = data.get('price_citation', data.get('citation')) source = data.get('price_source', '') elif 'est_price' in data or 'est_price_amount' in data: amnt = vocab.EstimatedPrice(ident='') price_amount = data.get('est_price_amount', data.get('est_price')) price_currency = data.get( 'est_price_currency', data.get('est_price_curr', data.get('currency'))) amount_type = 'Estimated Price' note = data.get('est_price_note', data.get('est_price_desc', data.get('note'))) cite = data.get('est_price_citation', data.get('citation')) source = data.get('est_price_source', data.get('est_price_so', '')) elif 'start_price' in data or 'start_price_amount' in data: amnt = vocab.StartingPrice(ident='') price_amount = data.get('start_price_amount', data.get('start_price')) price_currency = data.get( 'start_price_currency', data.get('start_price_curr', data.get('currency'))) amount_type = 'Starting Price' note = data.get('start_price_note', data.get('start_price_desc', data.get('note'))) cite = data.get('start_price_citation', data.get('citation')) source = data.get('start_price_source', data.get('start_price_so', '')) elif 'ask_price' in data or 'ask_price_amount' in data: amnt = vocab.AskingPrice(ident='') price_amount = data.get('ask_price_amount', data.get('ask_price')) price_currency = data.get( 'ask_price_currency', data.get('ask_price_curr', data.get('currency'))) amount_type = 'Asking Price' note = data.get('ask_price_note', data.get('ask_price_desc', data.get('note'))) cite = data.get('ask_price_citation', data.get('citation')) source = data.get('ask_price_source', data.get('ask_price_so', '')) else: return None price_amount_label = price_amount if price_amount or price_currency: if cite and add_citations: amnt.referred_to_by = vocab.BibliographyStatement(ident='', content=cite) if note: amnt.referred_to_by = vocab.Note(ident='', content=note) if price_amount: try: value = price_amount value = value.replace('[?]', '') value = value.replace('?', '') value = value.strip() if re.search(re.compile(r',\d\d\d'), value): value = value.replace(',', '') value = float(value) label_fmt = '{:,.%df}' % truncate_label_digits price_amount_label = label_fmt.format(value) amnt.value = value except ValueError: amnt._label = price_amount amnt.identified_by = model.Name(ident='', content=price_amount) # warnings.warn(f'*** Not a numeric price amount: {value}') if price_currency: price_currency_key = price_currency try: price_currency_key = currency_mapping[ price_currency_key.lower()] except KeyError: pass if isinstance(price_currency_key, model.BaseResource): amnt.currency = price_currency_key elif price_currency_key in vocab.instances: amnt.currency = vocab.instances[price_currency_key] else: warnings.warn('*** No currency instance defined for %s' % (price_currency_key, )) if price_amount_label and price_currency: amnt._label = '%s %s' % (price_amount_label, price_currency) elif price_amount: amnt._label = '%s' % (price_amount, ) return amnt return None
def add_props(self, data: dict, role=None, **kwargs): role = role if role else 'person' auth_name = data.get('auth_name', '') period_match = self.anon_period_re.match(auth_name) nationalities = [] if 'nationality' in data: nationality = data['nationality'] if isinstance(nationality, str): nationalities.append(nationality.lower()) elif isinstance(nationality, list): nationalities += [n.lower() for n in nationality] data['nationality'] = [] data.setdefault('referred_to_by', []) name = data['label'] active = self.clamped_timespan_args(data, name) if active: pact_uri = data['uri'] + '-ProfAct-active' a = self.professional_activity(name, ident=pact_uri, **active) data['events'].append(a) for key in ('notes', 'brief_notes', 'working_notes'): if key in data: for content in [n.strip() for n in data[key].split(';')]: if content: note = vocab.Note(ident='', content=content) data['referred_to_by'].append(note) for key in ('name_cite', 'bibliography'): if data.get(key): cite = vocab.BibliographyStatement(ident='', content=data[key]) data['referred_to_by'].append(cite) if data.get('name_cite'): cite = vocab.BibliographyStatement(ident='', content=data['name_cite']) data['referred_to_by'].append(cite) if self.is_anonymous_group(auth_name): nationality_match = self.anon_nationality_re.match(auth_name) dated_nationality_match = self.anon_dated_nationality_re.match( auth_name) dated_match = self.anon_dated_re.match(auth_name) data.setdefault('events', []) if nationality_match: with suppress(ValueError): nationality = nationality_match.group(1).lower() nationalities.append(nationality) group_label = self.anonymous_group_label( role, nationality=nationality) data['label'] = group_label elif dated_nationality_match: with suppress(ValueError): nationality = dated_nationality_match.group(1).lower() nationalities.append(nationality) century = int(dated_nationality_match.group(2)) group_label = self.anonymous_group_label( role, century=century, nationality=nationality) data['label'] = group_label pact_uri = data['uri'] + '-ProfAct-dated-natl' a = self.professional_activity(group_label, ident=pact_uri, century=century, narrow=True) data['events'].append(a) elif dated_match: with suppress(ValueError): century = int(dated_match.group(1)) group_label = self.anonymous_group_label(role, century=century) data['label'] = group_label pact_uri = data['uri'] + '-ProfAct-dated' a = self.professional_activity(group_label, ident=pact_uri, century=century, narrow=True) data['events'].append(a) elif period_match: period = period_match.group(1).lower() data['label'] = f'anonymous {period} {role}s' for nationality in nationalities: key = f'{nationality.lower()} nationality' n = vocab.instances.get(key) if n: data['nationality'].append(n) else: warnings.warn( f'No nationality instance found in crom for: {key!r}')
def extract_monetary_amount(data): ''' Returns a `MonetaryAmount`, `StartingPrice`, or `EstimatedPrice` object based on properties of the supplied `data` dict. If no amount or currency data is found in found, returns `None`. For `EstimatedPrice`, values will be accessed from these keys: - amount: `est_price_amount` or `est_price` - currency: `est_price_currency` or `est_price_curr` - note: `est_price_note` or `est_price_desc` - bibliographic statement: `est_price_citation` For `StartingPrice`, values will be accessed from these keys: - amount: `start_price_amount` or `start_price` - currency: `start_price_currency` or `start_price_curr` - note: `start_price_note` or `start_price_desc` - bibliographic statement: `start_price_citation` For `MonetaryAmount` prices, values will be accessed from these keys: - amount: `price_amount` or `price` - currency: `price_currency` or `price_curr` - note: `price_note` or `price_desc` - bibliographic statement: `price_citation` ''' amount_type = 'Price' if 'est_price' in data: amnt = vocab.EstimatedPrice() price_amount = data.get('est_price_amount', data.get('est_price')) price_currency = data.get('est_price_currency', data.get('est_price_curr')) amount_type = 'Estimated Price' note = data.get('est_price_note', data.get('est_price_desc')) cite = data.get('est_price_citation') elif 'start_price' in data: amnt = vocab.StartingPrice() price_amount = data.get('start_price_amount', data.get('start_price')) price_currency = data.get('start_price_currency', data.get('start_price_curr')) amount_type = 'Starting Price' note = data.get('start_price_note', data.get('start_price_desc')) cite = data.get('start_price_citation') else: amnt = model.MonetaryAmount() price_amount = data.get('price_amount', data.get('price')) price_currency = data.get('price_currency', data.get('price_curr')) note = data.get('price_note', data.get('price_desc')) cite = data.get('price_citation') if price_amount or price_currency: if cite: amnt.referred_to_by = vocab.BibliographyStatement(content=cite) if note: amnt.referred_to_by = vocab.Note(content=note) if price_amount: try: value = price_amount value = value.replace('[?]', '') value = value.replace('?', '') value = value.strip() price_amount = float(value) amnt.value = price_amount except ValueError: amnt._label = price_amount amnt.identified_by = model.Name(content=price_amount) # warnings.warn(f'*** Not a numeric price amount: {value}') if price_currency: if price_currency in CURRENCY_MAPPING: try: price_currency = CURRENCY_MAPPING[price_currency.lower()] except KeyError: pass if price_currency in vocab.instances: amnt.currency = vocab.instances[price_currency] else: warnings.warn('*** No currency instance defined for %s' % (price_currency,)) if price_amount and price_currency: amnt._label = '%s %s' % (price_amount, price_currency) elif price_amount: amnt._label = '%s' % (price_amount,) return amnt return None
def __call__(self, data, non_auctions, event_properties, problematic_records, transaction_types): '''Add modeling data for the auction of a lot of objects.''' self.helper.copy_source_information(data['_object'], data) auction_houses_data = event_properties['auction_houses'] auction_locations = event_properties['auction_locations'] auction_data = data['auction_of_lot'] try: lot_object_key = object_key(auction_data) except Exception as e: warnings.warn( f'Failed to compute lot object key from data {auction_data} ({e})' ) pprint.pprint({k: v for k, v in data.items() if v != ''}, stream=sys.stderr) raise cno, lno, date = lot_object_key sale_type = non_auctions.get(cno, 'Auction') ask_price = data.get('ask_price', {}).get('ask_price') if ask_price: # if there is an asking price/currency, it's a direct sale, not an auction; # filter these out from subsequent modeling of auction lots. warnings.warn( f'Skipping {cno} {lno} because it asserts an asking price') return if sale_type != 'Auction': # the records in this sales catalog do not represent auction sales, so the # price data should not be asserted as a sale price, but instead as an # asking price. with suppress(KeyError): prices = data['price'] del data['price'] if prices: price_data = prices[0] price = get_crom_object(price_data) if price: ma = vocab.add_classification(price, vocab.AskingPrice) data['ask_price'] = add_crom_data(price_data, ma) shared_lot_number = self.helper.shared_lot_number_from_lno(lno) uid, uri = self.helper.shared_lot_number_ids(cno, lno, date) sale_data = {'uid': uid, 'uri': uri} lot = self.helper.sale_for_sale_type(sale_type, lot_object_key) data['lot_object_id'] = f'{cno} {lno} ({date})' if 'link_to_pdf' in auction_data: url = auction_data['link_to_pdf'] page = vocab.WebPage(ident=url, label=url) lot.referred_to_by = page for problem_key, problem in problematic_records.get('lots', []): # TODO: this is inefficient, but will probably be OK so long as the number # of problematic records is small. We do it this way because we can't # represent a tuple directly as a JSON dict key, and we don't want to # have to do post-processing on the services JSON files after loading. if tuple(problem_key) == lot_object_key: note = model.LinguisticObject(ident='', content=problem) problem_classification = model.Type( ident=self.helper.problematic_record_uri, label='Problematic Record') problem_classification.classified_as = vocab.instances[ "brief text"] note.classified_as = problem_classification lot.referred_to_by = note cite_content = [] if data.get('transaction_so'): cite_content.append(data['transaction_so']) if data.get('transaction_cite'): cite_content.append(data['transaction_cite']) if cite_content: content = ', '.join(cite_content) cite = vocab.BibliographyStatement( ident='', content=content, label='Source of transaction type') cite.identified_by = model.Name( ident='', content='Source of transaction type') lot.referred_to_by = cite transaction = data.get('transaction') SOLD = transaction_types['sold'] WITHDRAWN = transaction_types['withdrawn'] self.set_lot_objects(lot, cno, lno, sale_data['uri'], data, sale_type) auction, _, _ = self.helper.sale_event_for_catalog_number( cno, sale_type) if transaction not in WITHDRAWN: lot.part_of = auction event_dates = event_properties['auction_dates'].get(cno) auction_houses = [ get_crom_object(self.helper.add_auction_house_data(h.copy())) for h in auction_houses_data.get(cno, []) ] self.set_lot_auction_houses(lot, cno, auction_houses) self.set_lot_location(lot, cno, auction_locations) self.set_lot_date(lot, auction_data, event_dates) self.set_lot_notes(lot, auction_data, sale_type) tx_uri = self.helper.transaction_uri_for_lot(auction_data, data) lots = self.helper.lots_in_transaction(auction_data, data) tx = vocab.ProvenanceEntry(ident=tx_uri) tx_label = prov_entry_label(self.helper, sale_type, transaction, 'of', cno, lots, date) tx._label = tx_label tx.identified_by = model.Name(ident='', content=tx_label) tx.caused_by = lot tx_data = {'uri': tx_uri} if transaction in SOLD: if sale_type == 'Auction': # the records in this sales catalog represent auction sales, so the # price data for a sale should be asserted as a hammer price. with suppress(KeyError): prices = data['price'] if prices: price_data = prices[0] price = get_crom_object(price_data) if price: vocab.add_classification( price, vocab.HammerPrice) multi = self.helper.transaction_contains_multiple_lots( auction_data, data) if multi: tx_data['multi_lot_tx'] = lots with suppress(AttributeError): tx_data['_date'] = lot.timespan data['_prov_entry_data'] = add_crom_data(data=tx_data, what=tx) data['_event_causing_prov_entry'] = add_crom_data(data=sale_data, what=lot) yield data
def add_props(self, data: dict, role=None, **kwargs): role = role if role else 'person' auth_name = data.get('auth_name', '') period_match = self.anon_period_re.match(auth_name) nationalities = [] if 'nationality' in data: nationality = data['nationality'] if isinstance(nationality, str): nationalities += [ n.lower().strip() for n in nationality.split(';') ] elif isinstance(nationality, list): nationalities += [n.lower() for n in nationality] data['nationality'] = [] data.setdefault('referred_to_by', []) # name = data['label'] # active = self.clamped_timespan_args(data, name) # cb = data.get('corporate_body') # if active: # pact_uri = data['uri'] + '-ProfAct-active' # a = self.professional_activity(name, ident=pact_uri, **active) # data['events'].append(a) notes_field_classification = { 'brief_notes': (vocab.BiographyStatement, vocab.External), 'text': (vocab.BiographyStatement, vocab.Internal), 'internal_notes': (vocab.BiographyStatement, vocab.Internal), 'working_notes': (vocab.ResearchStatement, vocab.Internal), } for key, note_classification in notes_field_classification.items(): if key in data: for content in [n.strip() for n in data[key].split(';')]: cite = vocab.make_multitype_obj(*note_classification, ident='', content=content) data['referred_to_by'].append(cite) for key in ('name_cite', 'bibliography'): if data.get(key): cite = vocab.BibliographyStatement(ident='', content=data[key]) data['referred_to_by'].append(cite) if data.get('name_cite'): cite = vocab.BibliographyStatement(ident='', content=data['name_cite']) data['referred_to_by'].append(cite) if self.is_anonymous_group(auth_name): nationality_match = self.anon_nationality_re.match(auth_name) dated_nationality_match = self.anon_dated_nationality_re.match( auth_name) dated_match = self.anon_dated_re.match(auth_name) data.setdefault('events', []) if nationality_match: with suppress(ValueError): nationality = nationality_match.group(1).lower() nationalities.append(nationality) group_label = self.anonymous_group_label( role, nationality=nationality) data['label'] = group_label elif dated_nationality_match: with suppress(ValueError): nationality = dated_nationality_match.group(1).lower() nationalities.append(nationality) century = int(dated_nationality_match.group(2)) group_label = self.anonymous_group_label( role, century=century, nationality=nationality) data['label'] = group_label pact_uri = data['uri'] + '-ProfAct-dated-natl' a = self.professional_activity( group_label, classified_as=[vocab.ActiveOccupation], ident=pact_uri, century=century, narrow=True) data['events'].append(a) elif dated_match: with suppress(ValueError): century = int(dated_match.group(1)) group_label = self.anonymous_group_label(role, century=century) data['label'] = group_label pact_uri = data['uri'] + '-ProfAct-dated' a = self.professional_activity( group_label, classified_as=[vocab.ActiveOccupation], ident=pact_uri, century=century, narrow=True) data['events'].append(a) elif period_match: period = period_match.group(1).lower() data['label'] = f'anonymous {period} {role}s' for nationality in nationalities: key = f'{nationality.lower()} nationality' n = vocab.instances.get(key) if n: data['nationality'].append(n) else: warnings.warn( f'No nationality instance found in crom for: {key!r}')