def test_init_params(self): p1 = model.Person(ident="urn:uuid:1234") self.assertEqual(p1.id, "urn:uuid:1234") p2 = model.Person(ident="http://schema.org/Foo") self.assertEqual(p2.id, "schema:Foo") p3 = model.Name(content="Test") self.assertEqual(p3.content, "Test") c = model.MonetaryAmount(value=10) self.assertEqual(c.value, 10) n = model.Name(value="Rob") self.assertEqual(n.content, "Rob") i = model.Identifier(content="xyz123") self.assertEqual(i.content, "xyz123") i2 = model.Identifier(value="abc") self.assertEqual(i2.content, "abc")
def handle_places(self, data): base_uri = self.helper.make_proj_uri('PLACE', '') for loc in data.get('locations', []): l = loc.get('location') if l: current = parse_location_name(l, uri_base=self.helper.proj_prefix) place_data = self.helper.make_place(current, base_uri=base_uri) data['places'].append(place_data) note = loc.get('location_note') if note: note = vocab.Note(ident='', content=note) data['referred_to_by'].append(note) date = loc.get('location_date') if date: note = vocab.BibliographyStatement( ident='', content=f'Residence in {l} ({date})') data['referred_to_by'].append(note) address = loc.get('address') if address: contact = model.Identifier(ident='', content=address) contact_data = add_crom_data(data={}, what=contact) data['contact_point'].append(contact_data) note = loc.get('address_note') if note: note = vocab.Note(ident='', content=note) data['referred_to_by'].append(note) date = loc.get('address_date') if date: note = vocab.BibliographyStatement( ident='', content=f'Address at {l} ({date})') data['referred_to_by'].append(note)
def handle_places(self, data): locations = {l.strip() for l in data.get('location', '').split(';')} - {''} base_uri = self.helper.make_proj_uri('PLACE', '') for l in locations: current = parse_location_name(l, uri_base=self.helper.proj_prefix) place_data = self.helper.make_place(current, base_uri=base_uri) data['places'].append(place_data) addresses = {l.strip() for l in data.get('address', '').split(';')} - {''} for address in addresses: contact = model.Identifier(ident='', content=address) contact_data = add_crom_data(data={}, what=contact) data['contact_point'].append(contact_data)
def __call__(self, data: dict, location_codes, unique_catalogs): '''Add information about the ownership of a physical copy of an auction catalog''' # Add the URI of this physical catalog to `unique_catalogs`. This data will be used # later to figure out which catalogs can be uniquely identified by a catalog number # and owner code (e.g. for owners who do not have multiple copies of a catalog). cno = data['catalog_number'] owner_code = data['owner_code'] copy_number = data.get('copy_number', '') owner_name = None entry_record = get_crom_object(data.get('_catalog')) with suppress(KeyError): owner_name = location_codes[owner_code] owner_uri = self.helper.make_proj_uri('ORGANIZATION', 'LOCATION-CODE', owner_code) data['_owner'] = { 'label': owner_name, 'uri': owner_uri, 'referred_to_by': [entry_record], 'identifiers': [ model.Name(ident='', content=owner_name), model.Identifier(ident='', content=str(owner_code)) ], } owner = model.Group(ident=owner_uri) owner.referred_to_by = entry_record add_crom_data(data['_owner'], owner) if not owner_code: warnings.warn(f'Setting empty identifier on {owner.id}') add_crom_data(data=data['_owner'], what=owner) catalog = get_crom_object(data) catalog.current_owner = owner owner_uri = self.helper.physical_catalog_uri( cno, owner_code, None ) # None here because we want a key that will stand in for all the copies belonging to a single owner copy_uri = self.helper.physical_catalog_uri(cno, owner_code, copy_number) unique_catalogs[owner_uri].add(copy_uri) return data
def set_properties(self, data, who): super().set_properties(data, who) with suppress(KeyError): who._label = str(data['label']) for ns in [ 'aat_nationality_1', 'aat_nationality_2', 'aat_nationality_3' ]: # add nationality n = data.get(ns) # XXX Strip out antique / modern anonymous as a nationality if n: if int(n) in [300310546, 300264736]: break natl = vocab.Nationality( ident="http://vocab.getty.edu/aat/%s" % n) who.classified_as = natl natl._label = str(data[ns + '_label']) else: break for n in data.get('nationality', []): if isinstance(n, model.BaseResource): who.classified_as = n # nationality field can contain other information, but not useful. # XXX Intentionally ignored but validate with GRI if data.get('active_early') or data.get('active_late'): act = vocab.Active() ts = model.TimeSpan(ident='') if data['active_early']: ts.begin_of_the_begin = "%s-01-01:00:00:00Z" % ( data['active_early'], ) ts.end_of_the_begin = "%s-01-01:00:00:00Z" % ( data['active_early'] + 1, ) if data['active_late']: ts.begin_of_the_end = "%s-01-01:00:00:00Z" % ( data['active_late'], ) ts.end_of_the_end = "%s-01-01:00:00:00Z" % ( data['active_late'] + 1, ) ts._label = "%s-%s" % (data['active_early'], data['active_late']) act.timespan = ts who.carried_out = act for event in data.get('events', []): who.carried_out = event if data.get('birth'): b = model.Birth() ts = model.TimeSpan(ident='') if 'birth_clean' in data and data['birth_clean']: if data['birth_clean'][0]: ts.begin_of_the_begin = data['birth_clean'][0].strftime( "%Y-%m-%dT%H:%M:%SZ") if data['birth_clean'][1]: ts.end_of_the_end = data['birth_clean'][1].strftime( "%Y-%m-%dT%H:%M:%SZ") ts._label = data['birth'] b.timespan = ts b._label = "Birth of %s" % who._label who.born = b if data.get('death'): d = model.Death() ts = model.TimeSpan(ident='') if 'death_clean' in data and data['death_clean']: if data['death_clean'][0]: ts.begin_of_the_begin = data['death_clean'][0].strftime( "%Y-%m-%dT%H:%M:%SZ") if data['death_clean'][1]: ts.end_of_the_end = data['death_clean'][1].strftime( "%Y-%m-%dT%H:%M:%SZ") ts._label = data['death'] d.timespan = ts d._label = "Death of %s" % who._label who.died = d if 'contact_point' in data: for p in data['contact_point']: if isinstance(p, model.Identifier): pl = p elif isinstance(p, dict): pl = get_crom_object(p) else: pl = model.Identifier(ident='', content=p) who.contact_point = pl
def set_properties(self, data, thing): ''' The following keys in `data` are handled to set properties on `thing`: `referred_to_by` `identifiers` `names` - An array of arrays of one or two elements. The first element of each array is a name string, and is set as the value of a `model.Name` for `thing`. If there is a `dict` second element, its contents are used to assert properties of the name: - An array associated with the key `'referred_to_by'` will be used to assert that the `LinguisticObject`s (or `dict`s representing a `LinguisticObject`) refer to the name. - A value associated with the key `'classified_as'` (either a `model.Type` or a cromulent vocab class) will be asserted as the classification of the `model.Name`. Example data: { 'names': [ ['J. Paul Getty'], [ 'Getty', { 'classified_as': model.Type(ident='http://vocab.getty.edu/aat/300404670', label='Primary Name'), # or: 'classified_as': vocab.PrimaryName, 'referred_to_by': [ {'uri': 'tag:getty.edu,2019:digital:pipeline:REPLACE-WITH-UUID:knoedler#K-ROW-1-2-3'}, model.LinguisticObject(ident='tag:getty.edu,2019:digital:pipeline:REPLACE-WITH-UUID:knoedler#K-ROW-1-7-10'), ] } ] ] } ''' for notedata in data.get('referred_to_by', []): if isinstance(notedata, tuple): content, itype = notedata if itype is not None: if isinstance(itype, type): note = itype(content=content) elif isinstance(itype, object): note = itype note.content = content else: note = vocab.Note(content=content) note.classified_as = itype elif isinstance(notedata, model.BaseResource): note = notedata elif isinstance(notedata, str): note = vocab.Note(content=notedata) else: note = notedata thing.referred_to_by = note for identifier in data.get('identifiers', []): if isinstance(identifier, tuple): content, itype = identifier if itype is not None: if isinstance(itype, type): ident = itype(ident='', content=content) if not content: warnings.warn( f'Setting empty identifier on {thing.id}') elif isinstance(itype, object): ident = itype ident.content = content if not content: warnings.warn( f'Setting empty identifier on {thing.id}') else: ident = model.Identifier(ident='') if not content: warnings.warn( f'Setting empty identifier on {thing.id}') ident.content = content ident.classified_as = itype else: ident = identifier # c = ident.content thing.identified_by = ident if not hasattr(thing, '_label') and 'label' in data: setattr(thing, '_label', data['label']) for namedata in data.get('names', []): # namedata should take the form of: # ["A. Name"] # ["A. Name", {'referred_to_by': [{'uri': 'URI-OF-LINGUISTIC_OBJECT'}, model.LinguisticObject()]}] if isinstance(namedata, tuple): name, *properties = namedata else: name = namedata properties = [] name_kwargs = {} for props in properties: if 'classified_as' in props: cl = props['classified_as'] del props['classified_as'] name_kwargs['title_type'] = cl n = set_la_name(thing, name, **name_kwargs) self.set_lo_properties(n, *properties)
def set_properties(self, data, thing): super().set_properties(data, thing) # TODO: this whole title_type thing isn't right. most of the identifiers below aren't titles title_type = model.Type(ident='http://vocab.getty.edu/aat/300417193', label='Title') name = None if 'label' in data: name = set_la_name(thing, data['label'], title_type, set_label=True) for author in data.get('created_by', []): thing.created_by = author for a in data.get('used_for', []): thing.used_for = a for a in data.get('about', []): thing.about = a for c in data.get('classified_as', []): thing.classified_as = c for t in data.get('translations', []): n = set_la_name(thing, t, title_type) if name is not None: n.translation_of = name for content, itype, notes in data.get('qualified_identifiers', []): ident = itype(content=content) if not content: warnings.warn(f'Setting empty identifier on {thing.id}') thing.identified_by = ident for n in notes: ident.referred_to_by = n code_type = None # TODO: is there a model.Type value for this sort of code? for c in data.get('classifications', []): if isinstance(c, model.Type): classification = c else: cid, label = c name = model.Name() name.classified_as = title_type name.content = label classification = model.Type(label=label) if not label: warnings.warn(f'Setting empty name on {classification.id}') classification.identified_by = name code = model.Identifier() code.classified_as = code_type if not cid: warnings.warn(f'Setting empty identifier on {code.id}') code.content = cid classification.identified_by = code thing.about = classification for c in data.get('indexing', []): if isinstance(c, tuple): cid, label = c name = model.Name() name.classified_as = title_type name.content = label indexing = model.Type(label=label) if not label: warnings.warn(f'Setting empty name on {indexing.id}') indexing.identified_by = name code = model.Identifier() code.classified_as = code_type code.content = cid if not cid: warnings.warn(f'Setting empty identifier on {code.id}') indexing.identified_by = code else: indexing = c thing.about = indexing parents = data.get('part_of', []) for parent_data in parents: parent = get_crom_object(parent_data) thing.part_of = parent children = data.get('part', []) for child_data in children: child = get_crom_object(child_data) thing.part = child for carrier in data.get('carried_by', []): hmo = get_crom_object(carrier) thing.carried_by = hmo for dimension in data.get('dimensions', []): thing.dimension = dimension
def model_imprint_group(self, record, data): if not data: return record.setdefault('referred_to_by', []) record.setdefault('used_for', []) record.setdefault('part_of', []) record.setdefault('_activities', []) record.setdefault('_groups', []) record.setdefault('_places', []) record.setdefault('identifiers', []) edition = data.get('edition') series_number = data.get('series_number') doi = data.get('doi') coden = data.get('coden') website = data.get('website_address') publishers = _as_list(data.get('publisher')) distributors = _as_list(data.get('distributor')) journal = data.get('journal_info') # imprint_group/journal_info/aata_journal_id # imprint_group/journal_info/aata_issue_id degree = data.get('thesis_degree') tr = data.get('technical_report_number') if edition: record['referred_to_by'].append( vocab.EditionStatement(ident='', content=edition)) if series_number: record['referred_to_by'].append( vocab.Note(ident='', content=series_number)) # TODO: classify this Note if doi: record['identifiers'].append( vocab.DoiIdentifier(ident='', content=doi)) if coden: record['identifiers'].append( vocab.CodenIdentifier(ident='', content=coden)) if website: record['referred_to_by'].append( vocab.Note(ident='', content=website)) article_label = record['label'] for i, publisher in enumerate(publishers): corp_id = publisher.get('gaia_corp_id') geog_id = publisher.get('publisher_location', {}).get('gaia_geog_id') a_uri = record['uri'] + f'-pub-{i}' a = vocab.Publishing(ident=a_uri, label=f'Publishing of {article_label}') if corp_id: uri = self.helper.corporate_body_uri(corp_id) g = model.Group(ident=uri) a.carried_out_by = g record['_groups'].append(add_crom_data({}, g)) if geog_id: uri = self.helper.place_uri(geog_id) p = model.Place(ident=uri) a.took_place_at = p record['_places'].append(add_crom_data({}, p)) record['used_for'].append(a) # record['_activities'].append(add_crom_data({}, a)) for i, distributor in enumerate(distributors): corp_id = distributor.get('gaia_corp_id') geog_id = distributor.get('distributor_location', {}).get('gaia_geog_id') a_uri = record['uri'] + f'-dist-{i}' a = vocab.Distributing(ident=a_uri, label=f'Distribution of {article_label}') if corp_id: uri = self.helper.corporate_body_uri(corp_id) g = model.Group(ident=uri) a.carried_out_by = g record['_groups'].append(add_crom_data({}, g)) if geog_id: uri = self.helper.place_uri(geog_id) p = model.Place(ident=uri) a.took_place_at = p record['_places'].append(add_crom_data({}, p)) record['used_for'].append(a) # record['_activities'].append(add_crom_data({}, a)) if journal: journal_id = journal.get('aata_journal_id') issue_id = journal.get('aata_issue_id') issue_uri = self.helper.issue_uri(journal_id, issue_id) issue = vocab.IssueText(ident=issue_uri) record['part_of'].append(add_crom_data({'uri': issue_uri}, issue)) if degree: record['referred_to_by'].append( vocab.Note(ident='', content=degree)) if tr: record['identifiers'].append(model.Identifier( ident='', content=tr)) # TODO: classify this Identifier