def __call__(self, data: dict, non_auctions): '''Add modeling for the entry describing a physical auction catalog in the PSCP dataset.''' cno = data['catalog_number'] owner = data['owner_code'] copy = data['copy_number'] rec_num = data['star_record_no'] sale_type = non_auctions.get(cno, data.get('non_auction_flag', 'Auction')) keys = [v for v in [cno, owner, copy] if v] record_uri = self.helper.make_proj_uri('ENTRY', 'PHYS-CAT', *keys) content = data['star_csv_data'] catalog_label = self.helper.physical_catalog_label( cno, sale_type, owner, copy) row_name = f'STAR Entry for Physical {catalog_label}' row = vocab.EntryTextForm(ident=record_uri, content=content, label=row_name) row.part_of = self.helper.static_instances.get_instance( 'LinguisticObject', 'db-sales_catalogs') creation = model.Creation(ident='') creation.carried_out_by = self.helper.static_instances.get_instance( 'Group', 'gpi') row.created_by = creation row.identified_by = self.helper.gpi_number_id(rec_num, vocab.StarNumber) row.identified_by = vocab.PrimaryName(ident='', content=row_name) data['_catalog_record'] = add_crom_data({'uri': record_uri}, row) yield data
def write_obj1(self, id): '''Writes a Person model object with a label and a PrimaryName''' p1 = vocab.Person(ident=f'urn:{id}', label='Greg') p1.identified_by = vocab.PrimaryName(content='Gregory Williams') w = MergingFileWriter(directory=self.path, model='test-model') d = self.obj_to_dict(p1, id) self.writer(d)
def setup_static_instances(self): ''' These are instances that are used statically in the code. For example, when we provide attribution of an identifier to Getty, or use a Lugt number, we need to serialize the related Group or Person record for that attribution, even if it does not appear in the source data. ''' lugt_ulan = 500321736 gri_ulan = 500115990 gci_ulan = 500115991 GETTY_GRI_URI = self.helper.make_proj_uri('ORGANIZATION', 'LOCATION-CODE', 'JPGM') GETTY_GCI_URI = self.helper.make_shared_uri( 'STATIC', 'ORGANIZATION', 'Getty Conservation Institute') LUGT_URI = self.helper.make_proj_uri('PERSON', 'ULAN', lugt_ulan) gci = model.Group(ident=GETTY_GCI_URI, label='Getty Conservation Institute') gci.identified_by = vocab.PrimaryName( ident='', content='Getty Conservation Institute') gci.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gci_ulan}') gri = model.Group(ident=GETTY_GRI_URI, label='Getty Research Institute') gri.identified_by = vocab.PrimaryName( ident='', content='Getty Research Institute') gri.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gri_ulan}') lugt = model.Person(ident=LUGT_URI, label='Frits Lugt') lugt.identified_by = vocab.PrimaryName(ident='', content='Frits Lugt') lugt.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{lugt_ulan}') instances = defaultdict(dict) instances.update({ 'Group': { 'gci': gci, 'gri': gri }, 'Person': { 'lugt': lugt } }) return instances
def static_db_instance(self, *keys, **kwargs): uri = self.helper.make_shared_uri('DB', *keys) label = ' '.join(keys) name = kwargs.get('name', f'STAR {label} Database') db = vocab.Database(ident=uri, label=name) db.identified_by = vocab.PrimaryName(ident='', content=name) creator = kwargs.get('creator') if creator: creation = model.Creation(ident='') creation.carried_out_by = creator db.created_by = creation return db
def add_title(self, data): ''' Special handling is given to modeling of the title (PrimaryName) of the article. If we can detect that it is written in a language that matches one of the languages asserted for either the document or summaries, then we assert that as the language of the title Linguistic Object. ''' if data.get('title'): title = data['title'] restrict = data['_declared_languages'] lang = self.helper.validated_string_language(title, restrict) pn = vocab.PrimaryName(ident='', content=title) if lang: pn.language = lang data['identifiers'].append(pn)
def model_series_group(self, record, data): if not data: return record.setdefault('identifiers', []) record.setdefault('referred_to_by', []) record.setdefault('language', []) title = data.get('title') title_translated = data.get('title_translated') variant_titles = _as_list(data.get('variant_title')) related_titles = _as_list(data.get('related_title')) lang_docs = _as_list(data['lang_doc']) frequency = data.get('frequency') start_year = data.get('start_year') cease_year = data.get('cease_year') issn = data.get('issn') coden = data.get('coden') if title: record['label'] = title record['identifiers'].append( vocab.PrimaryName(ident='', content=title)) if title_translated: record['identifiers'].append( vocab.TranslatedTitle(ident='', content=title)) for vtitle in variant_titles: record['identifiers'].append(vocab.Title(ident='', content=vtitle)) for lang in lang_docs: l = self.helper.language_object_from_code(lang) if l: record['language'].append(l) if frequency: record['referred_to_by'].append( vocab.Note(ident='', content=frequency)) if start_year: record['_publishing_start_year'] = start_year if cease_year: record['_publishing_cease_year'] = cease_year if issn: record['identifiers'].append( vocab.IssnIdentifier(ident='', content=issn)) if coden: record['identifiers'].append( vocab.CodenIdentifier(ident='', content=coden))
def __call__(self, data: dict): '''Add modeling for the entry describing a person/group in the PSCP PEOPLE dataset.''' recno = data['star_record_no'] auth_name = data.get('auth_name') record_uri = self.helper.make_proj_uri('ENTRY', 'PEOPLE', recno) content = data['star_csv_data'] record = vocab.EntryTextForm( ident=record_uri, label=f'Entry recorded in PSCP PEOPLE dataset for {auth_name}', content=content) creation = model.Creation(ident='') creation.carried_out_by = self.helper.static_instances.get_instance( 'Group', 'gpi') record.created_by = creation record.identified_by = self.helper.gpi_number_id( recno, vocab.StarNumber) record.identified_by = vocab.PrimaryName( ident='', content=f'STAR Person Authority Entry {recno}') record.part_of = self.helper.static_instances.get_instance( 'LinguisticObject', 'db-people') data['_entry_record'] = add_crom_data({'uri': record_uri}, record) yield data
def __call__(self, data: dict, post_sale_map, unique_catalogs, subject_genre, destruction_types_map): '''Add modeling for an object described by a sales record''' hmo = get_crom_object(data) parent = data['parent_data'] auction_data = parent.get('auction_of_lot') if auction_data: lno = str(auction_data['lot_number']) data.setdefault('identifiers', []) if not lno: warnings.warn(f'Setting empty identifier on {hmo.id}') data['identifiers'].append(vocab.LotNumber(ident='', content=lno)) else: warnings.warn(f'***** NO AUCTION DATA FOUND IN populate_object') cno = auction_data['catalog_number'] lno = auction_data['lot_number'] date = implode_date(auction_data, 'lot_sale_') lot = self.helper.shared_lot_number_from_lno( lno ) # the current key for this object; may be associated later with prev and post object keys now_key = (cno, lno, date) data['_locations'] = [] data['_events'] = [] record = self._populate_object_catalog_record(data, parent, lot, cno, parent['pi_record_no']) self._populate_object_visual_item(data, subject_genre) self._populate_object_destruction(data, parent, destruction_types_map) self.populate_object_statements(data) self._populate_object_present_location(data, now_key, destruction_types_map) self._populate_object_notes(data, parent, unique_catalogs) self._populate_object_prev_post_sales(data, now_key, post_sale_map) for p in data.get('portal', []): url = p['portal_url'] hmo.referred_to_by = vocab.WebPage(ident=url, label=url) if 'title' in data: title = data['title'] if not hasattr(hmo, '_label'): typestring = data.get('object_type', 'Object') hmo._label = f'{typestring}: “{title}”' del data['title'] shorter = truncate_with_ellipsis(title, 100) if shorter: description = vocab.Description(ident='', content=title) description.referred_to_by = record hmo.referred_to_by = description title = shorter t = vocab.PrimaryName(ident='', content=title) t.classified_as = model.Type( ident='http://vocab.getty.edu/aat/300417193', label='Title') t.referred_to_by = record data['identifiers'].append(t) for d in data.get('other_titles', []): title = d['title'] t = vocab.Name(ident='', content=title) data['identifiers'].append(t) return data
def make_place(self, data: dict, base_uri=None): ''' Given a dictionary representing data about a place, construct a model.Place object, assign it as the crom data in the dictionary, and return the dictionary. The dictionary keys used to construct the place object are: - name - type (one of: 'City', 'State', 'Province', 'Country', or 'Sovereign') - part_of (a recursive place dictionary) If the name matches a known unique location (derived from the unique_locations service data), the normal recursive handling of part_of data is bypassed, using the ''' # unique_locations = self.unique_locations canonical_location_names = self.canonical_location_names TYPES = { 'city': vocab.instances['city'], 'county': vocab.instances['county'], 'province': vocab.instances['province'], 'state': vocab.instances['province'], 'country': vocab.instances['nation'], 'sovereign': vocab.instances['sovereign'], } if data is None: return None type_name = data.get('type', 'place').lower() name = data.get('name') si = self.static_instances names = data.get('names', []) label = name parent_data = data.get('part_of') place_type = TYPES.get(type_name) parent = None if name.casefold() in canonical_location_names: name = canonical_location_names.get(name.casefold(), name) label = name elif parent_data: parent_data = self.make_place(parent_data, base_uri=base_uri) parent = get_crom_object(parent_data) if label: label = f'{label}, {parent._label}' placeargs = {} p = None if si: p = si.get_instance('Place', name) if not p: p = si.get_instance('Place', label) if p: # this is a static instance. we need to re-thread the part_of relationship # in the data dictionary, because the serialization depends on the dictionary # data, not the properties of the modeled object # from cromulent.model import factory # print(f'PLACE: {name} => {factory.toString(p, False)}') add_crom_data(data=data, what=p) queue = [data] while queue: place_data = queue.pop(0) place = get_crom_object(place_data) parents = getattr(place, 'part_of', []) or [] if parents: for parent in parents: if parent: if 'part_of' not in place_data: parent_data = add_crom_data(data={}, what=parent) place_data['part_of'] = parent_data else: parent_data = add_crom_data( data=place_data['part_of'], what=parent) queue.append(parent_data) elif 'part_of' in place_data: parent_data = self.make_place(place_data['part_of'], base_uri=base_uri) queue.append(parent_data) if p: return data if label: placeargs['label'] = label if data.get('uri'): placeargs['ident'] = data['uri'] # elif label.casefold() in canonical_location_names: # label = canonical_location_names[label.casefold()] # data['uri'] = self.make_shared_uri('PLACE', label) # placeargs['ident'] = data['uri'] elif base_uri: data['uri'] = base_uri + urllib.parse.quote(label) placeargs['ident'] = data['uri'] if not p: p = model.Place(**placeargs) if place_type: p.classified_as = place_type if name: p.identified_by = vocab.PrimaryName(ident='', content=name) else: warnings.warn(f'Place with missing name on {p.id}') for name in names: if name: p.identified_by = model.Name(ident='', content=name) if parent: p.part_of = parent data['part_of'] = parent_data return add_crom_data(data=data, what=p)
def setup_static_instances(self): ''' These are instances that are used statically in the code. For example, when we provide attribution of an identifier to Getty, or use a Lugt number, we need to serialize the related Group or Person record for that attribution, even if it does not appear in the source data. ''' lugt_ulan = 500321736 gri_ulan = 500115990 gci_ulan = 500115991 knoedler_ulan = 500304270 GETTY_GRI_URI = self.helper.make_proj_uri('ORGANIZATION', 'LOCATION-CODE', 'JPGM') GETTY_GCI_URI = self.helper.make_shared_uri( 'STATIC', 'ORGANIZATION', 'Getty Conservation Institute') LUGT_URI = self.helper.make_proj_uri('PERSON', 'ULAN', lugt_ulan) KNOEDLER_URI = self.helper.make_shared_uri('ORGANIZATION', 'ULAN', str(knoedler_ulan)) gci = model.Group(ident=GETTY_GCI_URI, label='Getty Conservation Institute') gci.identified_by = vocab.PrimaryName( ident='', content='Getty Conservation Institute') gci.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gci_ulan}') gri = model.Group(ident=GETTY_GRI_URI, label='Getty Research Institute') gri.identified_by = vocab.PrimaryName( ident='', content='Getty Research Institute') gri.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gri_ulan}') lugt = model.Person(ident=LUGT_URI, label='Frits Lugt') lugt.identified_by = vocab.PrimaryName(ident='', content='Frits Lugt') lugt.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{lugt_ulan}') knoedler_name = 'M. Knoedler & Co.' knoedler = model.Group(ident=KNOEDLER_URI, label=knoedler_name) knoedler.identified_by = vocab.PrimaryName(ident='', content=knoedler_name) knoedler.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{knoedler_ulan}') materials = { aat: model.Material(ident=f'http://vocab.getty.edu/aat/{aat}', label=label) for aat, label in self.services['materials'].items() } instances = defaultdict(dict) instances.update({ 'Group': { 'gci': gci, 'gri': gri, 'knoedler': knoedler }, 'Person': { 'lugt': lugt }, 'Material': materials, 'Place': self._static_place_instances() }) return instances
def model_issue_group(self, record, data, seq): record.setdefault('^part', []) issue_id = data['issue_id'] title = data.get('title') title_translated = data.get('title_translated') date = data.get('date') # issue_group/date/display_date # issue_group/date/sort_year volume = data.get('volume') number = data.get('number') note = data.get('note') journal_label = record['label'] issue_label = f'Issue of {journal_label}' if title: issue_label = f'{journal_label}: “{title}”' if volume and number: issue_label = f'{issue_label} (v. {volume}, n. {number})' elif volume and number: issue_label = f'{journal_label} (v. {volume}, n. {number})' jid = record['record_desc_group']['record_id'] issue = { 'uri': self.helper.issue_uri(jid, issue_id), 'label': issue_label, 'object_type': vocab.IssueText, 'identifiers': [self.helper.gci_number_id(issue_id)], 'referred_to_by': [], 'used_for': [], } if title: issue['identifiers'].append( vocab.PrimaryName(ident='', content=title)) if title_translated: issue['identifiers'].append( vocab.TranslatedTitle(ident='', content=title_translated)) if date: display_date = date.get('display_date') sort_year = date.get('sort_year') if display_date or sort_year: a_uri = issue['uri'] + f'-pub' a = vocab.Publishing(ident=a_uri, label=f'Publishing of {issue_label}') ts = model.TimeSpan(ident='') if display_date: ts._label = display_date ts.identified_by = vocab.DisplayName(ident='', content=display_date) if sort_year: try: year = int(sort_year) ts.begin_of_the_begin = '%04d-01-01:00:00:00Z' % ( year, ) ts.end_of_the_end = '%04d-01-01:00:00:00Z' % (year + 1, ) except: pass a.timespan = ts issue['used_for'].append(a) # TODO: # volume # number if note: issue['referred_to_by'].append(vocab.Note(ident='', content=note)) mlalo = MakeLinkedArtLinguisticObject() mlalo(issue) i = get_crom_object(issue) for a in issue.get('used_for', []): i.used_for = a record['^part'].append(issue)
def setup_static_instances(self): ''' These are instances that are used statically in the code. For example, when we provide attribution of an identifier to Getty, or use a Lugt number, we need to serialize the related Group or Person record for that attribution, even if it does not appear in the source data. ''' lugt_ulan = 500321736 gri_ulan = 500115990 gci_ulan = 500115991 knoedler_ulan = 500304270 GETTY_PSCP_URI = self.helper.make_shared_uri( 'STATIC', 'ORGANIZATION', 'Project for the Study of Collecting and Provenance') GETTY_GPI_URI = self.helper.make_shared_uri('STATIC', 'ORGANIZATION', 'Getty Provenance Index') GETTY_GRI_URI = self.helper.make_proj_uri('ORGANIZATION', 'LOCATION-CODE', 'JPGM') GETTY_GCI_URI = self.helper.make_shared_uri( 'STATIC', 'ORGANIZATION', 'Getty Conservation Institute') LUGT_URI = self.helper.make_proj_uri('PERSON', 'ULAN', lugt_ulan) KNOEDLER_URI = self.helper.make_shared_uri('ORGANIZATION', 'ULAN', str(knoedler_ulan)) NEWYORK_URI = self.helper.make_shared_uri('PLACE', 'USA', 'NY', 'New York') gci = model.Group(ident=GETTY_GCI_URI, label='Getty Conservation Institute') gci.identified_by = vocab.PrimaryName( ident='', content='Getty Conservation Institute') gci.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gci_ulan}') gri = model.Group(ident=GETTY_GRI_URI, label='Getty Research Institute') gri.identified_by = vocab.PrimaryName( ident='', content='Getty Research Institute') gri.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{gri_ulan}') gpi = model.Group(ident=GETTY_GPI_URI, label='Getty Provenance Index') gpi.identified_by = vocab.PrimaryName(ident='', content='Getty Provenance Index') pscp = model.Group( ident=GETTY_PSCP_URI, label='Project for the Study of Collecting and Provenance') pscp.identified_by = vocab.PrimaryName( ident='', content='Project for the Study of Collecting and Provenance') lugt = model.Person(ident=LUGT_URI, label='Frits Lugt') lugt.identified_by = vocab.PrimaryName(ident='', content='Frits Lugt') lugt.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{lugt_ulan}') knoedler_name = 'M. Knoedler & Co.' knoedler = model.Group(ident=KNOEDLER_URI, label=knoedler_name) knoedler.identified_by = vocab.PrimaryName(ident='', content=knoedler_name) knoedler.exact_match = model.BaseResource( ident=f'http://vocab.getty.edu/ulan/{knoedler_ulan}') newyork_name = 'New York, NY' newyork = model.Place(ident=NEWYORK_URI, label=newyork_name) newyork.identified_by = vocab.PrimaryName(ident='', content=newyork_name) materials = {} if 'materials' in self.services: materials.update({ aat: model.Material(ident=f'http://vocab.getty.edu/aat/{aat}', label=label) for aat, label in self.services['materials'].items() }) places = self._static_place_instances() places.update({'newyork': newyork}) db_people = self.static_db_instance( 'PEOPLE', name='STAR Person Authority Database', creator=gpi) db_knoedler = self.static_db_instance('Knoedler', name='STAR Knoedler Database', creator=gpi) db_sales_events = self.static_db_instance( 'Sales', 'Descriptions', name='STAR Sales Catalogue Database', creator=gpi) db_sales_catalogs = self.static_db_instance( 'Sales', 'Catalogue', name='STAR Physical Sales Catalogue Database', creator=gpi) db_sales_contents = self.static_db_instance( 'Sales', 'Contents', name='STAR Sales Contents Database', creator=gpi) instances = defaultdict(dict) instances.update({ 'LinguisticObject': { 'db-people': db_people, 'db-knoedler': db_knoedler, 'db-sales_events': db_sales_events, 'db-sales_catalogs': db_sales_catalogs, 'db-sales_contents': db_sales_contents, }, 'Group': { 'gci': gci, 'pscp': pscp, 'gri': gri, 'gpi': gpi, 'knoedler': knoedler }, 'Person': { 'lugt': lugt }, 'Material': materials, 'Place': places }) return instances
def add_names(self, data: dict, referrer=None, role=None, group=False, **kwargs): ''' Based on the presence of `auth_name` and/or `name` fields in `data`, sets the `label`, `names`, and `identifier` keys to appropriate strings/`model.Identifier` values. If the `role` string is given (e.g. 'artist'), also sets the `role_label` key to a value (e.g. 'artist “RUBENS, PETER PAUL”'). ''' data.setdefault('identifiers', []) auth_name = data.get('auth_name', '') disp_name = data.get('auth_display_name') name_types = [vocab.PrimaryName] personalNameType = vocab.CorporateName if group else vocab.PersonalName if disp_name: if auth_name: data['identifiers'].append( vocab.PrimaryName(ident='', content=auth_name)) data['label'] = auth_name auth_name = disp_name name_types = [personalNameType, vocab.DisplayName] role_label = None if self.acceptable_person_auth_name(auth_name): if role: role_label = f'{role} “{auth_name}”' data.setdefault('label', auth_name) pname = vocab.make_multitype_obj( *name_types, ident='', content=auth_name ) # NOTE: most of these are also vocab.SortName, but not 100%, so witholding that assertion for now if referrer: pname.referred_to_by = referrer data['identifiers'].append(pname) data.setdefault('names', []) names = [] name = data.get('name') if name: del data[ 'name'] # this will be captured in the 'names' array, so remove it here so the output isn't duplicated names.append(name) variant_names = data.get('variant_names') if variant_names: names += [n.strip() for n in variant_names.split(';')] for name in names: if role and not role_label: role_label = f'{role} “{name}”' name_kwargs = {} # name_kwargs['classified_as'] = model.Type(ident='http://vocab.getty.edu/aat/300266386', label='Personal Name') name_kwargs['classified_as'] = personalNameType if referrer: name_kwargs['referred_to_by'] = [referrer] data['names'].append((name, name_kwargs)) data.setdefault('label', name) data.setdefault('label', '(Anonymous)') if role and not role_label: role_label = f'anonymous {role}' if role: data['role_label'] = role_label