def marc21_to_contribution(self, key, value): """Get contribution.""" if key[4] == '2' or key[:3] not in ['100', '700', '710', '711']: return None agent = {'type': 'bf:Person'} if value.get('a'): name = utils.force_list(value.get('a'))[0] agent['preferred_name'] = remove_trailing_punctuation(name) # 100|700 Person if key[:3] in ['100', '700']: if value.get('b'): numeration = utils.force_list(value.get('b'))[0] agent['numeration'] = remove_trailing_punctuation(numeration) if value.get('c'): qualifier = utils.force_list(value.get('c'))[0] agent['qualifier'] = remove_trailing_punctuation(qualifier) if value.get('d'): date = utils.force_list(value.get('d'))[0] date = date.rstrip(',') dates = remove_trailing_punctuation(date).split('-') with contextlib.suppress(Exception): if date_of_birth := dates[0].strip(): agent['date_of_birth'] = date_of_birth with contextlib.suppress(Exception): if date_of_death := dates[1].strip(): agent['date_of_death'] = date_of_death
def marc21_to_author(self, key, value): """Get author. authors: loop: authors.name: 100$a [+ 100$b if it exists] or [700$a (+$b if it exists) repetitive] or [ 710$a repetitive (+$b if it exists, repetitive)] authors.date: 100 $d or 700 $d (facultatif) authors.qualifier: 100 $c or 700 $c (facultatif) authors.type: if 100 or 700 then person, if 710 then organisation """ if not (key[4] == '2' and (key[:3] == '710' or key[:3] == '700')): author = {} author['type'] = 'person' author['name'] = remove_trailing_punctuation(value.get('a')) author_subs = utils.force_list(value.get('b')) if author_subs: for author_sub in author_subs: author['name'] += ' ' + remove_trailing_punctuation(author_sub) if key[:3] == '710': author['type'] = 'organisation' else: if value.get('c'): author['qualifier'] = remove_trailing_punctuation( value.get('c')) if value.get('d'): author['date'] = remove_trailing_punctuation(value.get('d')) return author
def build_place_or_agent_data(code, label, index, link, add_country): type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'} place_or_agent_data = { 'type': type_per_code[code], 'label': [{ 'value': remove_trailing_punctuation(label) }] } if add_country: if marc21tojson.cantons: place_or_agent_data['canton'] = marc21tojson.cantons if marc21tojson.country: place_or_agent_data['country'] = marc21tojson.country try: alt_gr = marc21tojson.alternate_graphic['264'][link] subfield = \ marc21tojson.get_subfields(alt_gr['field'])[index] place_or_agent_data['label'].append({ 'value': remove_trailing_punctuation(subfield), 'language': '-'.join((marc21tojson.lang_from_008, alt_gr['script'])) }) except Exception: pass return place_or_agent_data
def marc21_to_title(self, key, value): """Get title. title: 245$a without the punctuaction. If there's a $b, then 245$a : $b without the ' /' """ main_title = remove_trailing_punctuation(value.get('a')) sub_title = value.get('b') # responsability = value.get('c') if sub_title: main_title += ' : ' + ' : '.join( utils.force_list(remove_trailing_punctuation(sub_title))) return main_title
def build_edition_data(code, label, index, link): data = [{'value': remove_trailing_punctuation(label)}] try: alt_gr = marc21.alternate_graphic['250'][link] subfield = \ marc21.get_subfields(alt_gr['field'])[index] data.append({ 'value': remove_trailing_punctuation(subfield), 'language': get_language_script(alt_gr['script']) }) except Exception as err: pass return data
def edition_format_text(edition): """Format edition for _text.""" designations = edition.get('editionDesignation', []) responsibilities = edition.get('responsibility', []) designation_output = {} for designation in designations: language = designation.get('language', 'default') value = designation.get('value', '') designation_output[language] = value responsibility_output = {} for responsibility in responsibilities: language = responsibility.get('language', 'default') value = responsibility.get('value', '') responsibility_output[language] = value edition_text = [] for key, value in designation_output.items(): value = remove_trailing_punctuation( '{designation} / {responsibility}'.format( designation=designation_output.get(key), responsibility=responsibility_output.get(key, ''), ) ) if display_alternate_graphic_first(key): edition_text.insert(0, {'value': value, 'language': key}) else: edition_text.append({'value': value, 'language': key}) return edition_text
def marc21_to_description(self, key, value): """Get extent. extent: 300$a (the first one if many) """ if value.get('a') and not self.get('extent', None): self['extent'] = remove_trailing_punctuation( utils.force_list(value.get('a'))[0]) return None
def build_place_or_agent_data(code, label, index): type_per_code = {'a': 'bf:Place', 'c': 'bf:Agent'} place_or_agent_data = { 'type': type_per_code[code], 'label': [{ 'value': remove_trailing_punctuation(label) }] } return place_or_agent_data
def marc21_to_edition_statement(self, key, value): """Get edition statement data. editionDesignation: 250 [$a non repetitive] (without trailing ponctuation) responsibility: 250 [$b non repetitive] """ edition_data = {} if subfields_a := utils.force_list(value.get('a')): subfield_a = remove_trailing_punctuation(subfields_a[0]) edition_data['editionDesignation'] = [{'value': subfield_a}]
def marc21_to_author(self, key, value): """Get author. authors: loop: authors.name: 100$a [+ 100$b if it exists] or [700$a (+$b if it exists) repetitive] or [ 710$a repetitive (+$b if it exists, repetitive)] authors.date: 100 $d or 700 $d (facultatif) authors.qualifier: 100 $c or 700 $c (facultatif) authors.type: if 100 or 700 then person, if 710 then organisation """ if not key[4] == '2': author = {} author['type'] = 'person' if value.get('0'): refs = utils.force_list(value.get('0')) for ref in refs: ref = get_person_link(marc21.bib_id, ref, key, value) if ref: author['$ref'] = ref # we do not have a $ref if not author.get('$ref'): author['name'] = '' if value.get('a'): data = not_repetitive(marc21.bib_id, key, value, 'a') author['name'] = remove_trailing_punctuation(data) author_subs = utils.force_list(value.get('b')) if author_subs: for author_sub in author_subs: author['name'] += ' ' + \ remove_trailing_punctuation(author_sub) if key[:3] == '710': author['type'] = 'organisation' else: if value.get('c'): data = not_repetitive(marc21.bib_id, key, value, 'c') author['qualifier'] = remove_trailing_punctuation(data) if value.get('d'): data = not_repetitive(marc21.bib_id, key, value, 'd') author['date'] = remove_trailing_punctuation(data) return author else: return None
def build_place_or_agent_data(code, label): place_or_agent_data = None type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'} value = remove_trailing_punctuation(label) if value: place_or_agent_data = { 'type': type_per_code[code], 'label': [{ 'value': value }] } return place_or_agent_data
def marc21_to_description(self, key, value): """Get extent, otherMaterialCharacteristics, formats. extent: 300$a (the first one if many) otherMaterialCharacteristics: 300$b (the first one if many) formats: 300 [$c repetitive] """ if value.get('a'): if not self.get('extent', None): self['extent'] = remove_trailing_punctuation( utils.force_list(value.get('a'))[0]) if value.get('b'): if self.get('otherMaterialCharacteristics', []) == []: self['otherMaterialCharacteristics'] = remove_trailing_punctuation( utils.force_list(value.get('b'))[0]) if value.get('c'): formats = self.get('formats', None) if not formats: data = value.get('c') formats = list(utils.force_list(data)) return formats
def build_agent_data(code, label, index, link): type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'} agent_data = { 'type': type_per_code[code], 'label': [{ 'value': remove_trailing_punctuation(label) }] } try: alt_gr = marc21.alternate_graphic['264'][link] subfield = \ marc21.get_subfields(alt_gr['field'])[index] agent_data['label'].append({ 'value': remove_trailing_punctuation(subfield), 'language': marc21.get_language_script(alt_gr['script']) }) except Exception as err: pass return agent_data
def build_place_or_agent_data(code, label, add_country): place_or_agent_data = None type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'} value = remove_trailing_punctuation(label) if value: place_or_agent_data = { 'type': type_per_code[code], 'label': [{ 'value': value }] } if add_country and marc21.country: place_or_agent_data['country'] = marc21.country return place_or_agent_data
def build_place_or_agent_data(code, label, index, add_country): type_per_code = {'a': 'bf:Place', 'c': 'bf:Agent'} place_or_agent_data = { 'type': type_per_code[code], 'label': [{ 'value': remove_trailing_punctuation(label) }] } if add_country: # country from 102 field_102 = unimarctojson.get_fields(tag='102') if field_102: field_102 = field_102[0] country_codes = unimarctojson.get_subfields(field_102, 'a') if country_codes: place_or_agent_data['country'] = country_codes[0].lower() return place_or_agent_data
def publication_statement_text(provision_activity): """Create publication statement from place, agent and date values.""" punctuation = { 'bf:Place': ' ; ', 'bf:Agent': ' ; ', 'Date': ', ' } statement_with_language = {'default': ''} last_statement_type = None # Perform each statement entries to build the best possible string for statement in provision_activity.get('statement', []): for label in statement['label']: language = label.get('language', 'default') statement_with_language.setdefault(language, '') if statement_with_language[language]: if last_statement_type == statement['type']: statement_with_language[language] += punctuation[ last_statement_type ] elif statement['type'] == 'bf:Place': statement_with_language[language] += ' ; ' elif statement['type'] == 'Date': statement_with_language[language] += ', ' else: statement_with_language[language] += ' : ' statement_with_language[language] += label['value'] last_statement_type = statement['type'] # date field: remove ';' and append statement_text = [] for key, value in statement_with_language.items(): value = remove_trailing_punctuation(value) if display_alternate_graphic_first(key): statement_text.insert(0, {'value': value, 'language': key}) else: statement_text.append({'value': value, 'language': key}) return statement_text
def unimarc_to_contribution(self, key, value): """Get contribution. contribution: loop: 700 Nom de personne – Responsabilité principale 701 Nom de personne – Autre responsabilité principale 702 Nom de personne – Responsabilité secondaire 710 Nom de collectivité – Responsabilité principale 711 Nom de collectivité – Autre responsabilité principale 712 Nom de collectivité – Responsabilité secondaire """ agent = {} agent['preferred_name'] = ', '.join(utils.force_list(value.get('a', ''))) agent['type'] = 'bf:Person' if agent['preferred_name']: if value.get('b'): agent['preferred_name'] += \ ', ' + ', '.join(utils.force_list(value.get('b'))) if key[:3] in ['700', '701', '702', '703']: if value.get('d'): agent['numeration'] = value.get('d') if value.get('c'): agent['qualifier'] = value.get('c') if value.get('f'): date = utils.force_list(value.get('f'))[0] date = date.replace('-....', '-') dates = date.split('-') try: date_of_birth = dates[0].strip() if date_of_birth: agent['date_of_birth'] = date_of_birth except Exception: pass try: date_of_death = dates[1].strip() if date_of_death: agent['date_of_death'] = date_of_death except Exception: pass if key[:3] in ['710', '711', '712']: agent['type'] = 'bf:Organisation' agent['conference'] = key[3] == '1' if agent['preferred_name']: if value.get('c'): agent['preferred_name'] += \ ', ' + ', '.join(utils.force_list(value.get('c'))) if value.get('d'): conference_number = utils.force_list(value.get('d'))[0] agent['conference_number'] = remove_trailing_punctuation( conference_number ).lstrip('(').rstrip(')') if value.get('e'): conference_place = utils.force_list(value.get('e'))[0] agent['conference_place'] = remove_trailing_punctuation( conference_place ).lstrip('(').rstrip(')') if value.get('f'): conference_date = utils.force_list(value.get('f'))[0] agent['conference_date'] = remove_trailing_punctuation( conference_date ).lstrip('(').rstrip(')') IDREF_ROLE_CONV = { "070": "aut", "230": "cmp", "205": "ctb", "340": "edt", "420": "hnr", "440": "ill", "600": "pht", "590": "prf", "730": "trl", "080": "aui", "160": "bsl", "220": "com", "300": "drt", "430": "ilu", "651": "pbd", "350": "egr", "630": "pro", "510": "ltg", "365": "exp", "727": "dgs", "180": "ctg", "220": "com", "210": "cmm", "200": "chr", "110": "bnd", "720": "ato", "030": "arr", "020": "ann", "632": "adi", "005": "act", "390": "fmo", "545": "mus" } roles = [] if value.get('4'): for role in utils.force_list(value.get('4')): role_conv = IDREF_ROLE_CONV.get(role) if role_conv: roles.append(role_conv) roles = list(set(roles)) if not roles: roles = ['aut'] return { 'agent': agent, 'role': roles }
def marc21_to_work_access_point(self, key, value): """Get work access point.""" """ * "date_of_work": "[130$f|730$f]" * "miscellaneous_information": "[130$g|130$s|730$g|730$s]" * "language": "[130$l|730$l]" * "form_subdivision": ["[130$k|730$k]"] * "medium_of_performance_for_music": ["[130$m|730$m]"] * "arranged_statement_for_music": "[130$o|730$o]" * "key_for_music": "[130$r|730$r]" [1] Nettoyer la chaîne: supprimer la ponctuation finale "/:;.,=", supprimer en particulier la chaine ". - " """ work = {} tag_key = key[:3] part_list = TitlePartList(part_number_code='n', part_name_code='p') part_selection = {'n', 'p'} items = get_field_items(value) if tag_key in ['130', '730']: work_selection = {'a', 'f', 'k', 'l', 'm', 'o', 'r'} miscellaneous_selection = {'g', 's'} miscellaneous_parts = [] # _WORK_ACCESS_POINT.get(subfield_code) for blob_key, blob_value in items: if blob_key in work_selection: if blob_key in {'k', 'm'}: datas = work.get(_WORK_ACCESS_POINT.get(blob_key), []) datas.append(blob_value) work[_WORK_ACCESS_POINT.get(blob_key)] = datas else: work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value if blob_key in miscellaneous_selection: miscellaneous_parts.append(blob_value) if blob_key in part_selection: part_list.update_part(blob_value, blob_key, blob_value) if miscellaneous_parts: work['miscellaneous_information'] = '. '.join(miscellaneous_parts) if tag_key == '240': for blob_key, blob_value in items: if blob_key in {'a'}: # work[_WORK_ACCESS_POINT.get('a')] = value.get('a') work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value if blob_key in part_selection: part_list.update_part(blob_value, blob_key, blob_value) field_100 = marc21.get_fields('100') if field_100: agent = {} for blob_key, blob_value in field_100[0].get('subfields').items(): agent['type'] = 'bf:Person' if blob_key == 'a': # numeration = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b') agent['preferred_name'] = remove_trailing_punctuation( blob_value) if blob_key == 'b': # numeration = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b') agent['numeration'] = remove_trailing_punctuation( blob_value) if blob_key == 'c': # qualifier = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'c') agent['qualifier'] = remove_trailing_punctuation( blob_value) if blob_key == 'd': # date = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'd') date = blob_value.rstrip(',') dates = remove_trailing_punctuation(date).split('-') try: date_of_birth = dates[0].strip() if date_of_birth: agent['date_of_birth'] = date_of_birth except Exception: pass try: date_of_death = dates[1].strip() if date_of_death: agent['date_of_death'] = date_of_death except Exception: pass if blob_key == 'q': # fuller_form_of_name = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'q') agent['fuller_form_of_name'] = remove_trailing_punctuation( blob_value).lstrip('(').rstrip(')') work['agent'] = agent the_part_list = part_list.get_part_list() if the_part_list: work['part'] = the_part_list if work: work_access_points = self.get('work_access_point', []) work_access_points.append(work) self['work_access_point'] = work_access_points
def build_place_or_agent_data(code, label): type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'} return {'type': type_per_code[code], 'label': [{'value': value}]} \ if (value := remove_trailing_punctuation(label)) else None
def marc21_to_contribution(self, key, value): """Get contribution.""" if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']: agent = {} if value.get('0'): refs = utils.force_list(value.get('0')) for ref in refs: ref = get_person_link(marc21.bib_id, marc21.rero_id, ref, key, value) if ref: agent['$ref'] = ref # we do not have a $ref if not agent.get('$ref') and value.get('a'): agent = {'type': 'bf:Person'} if value.get('a'): name = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'a').rstrip('.') if name: agent['preferred_name'] = name # 100|700 Person if key[:3] in ['100', '700']: if value.get('b'): numeration = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'b') numeration = remove_trailing_punctuation(numeration) if numeration: agent['numeration'] = numeration if value.get('c'): qualifier = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'c') agent['qualifier'] = remove_trailing_punctuation(qualifier) if value.get('d'): date = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'd') date = date.rstrip(',') dates = remove_trailing_punctuation(date).split('-') try: date_of_birth = dates[0].strip() if date_of_birth: agent['date_of_birth'] = date_of_birth except Exception: pass try: date_of_death = dates[1].strip() if date_of_death: agent['date_of_death'] = date_of_death except Exception: pass if value.get('q'): fuller_form_of_name = not_repetitive( marc21.bib_id, marc21.rero_id, key, value, 'q') fuller_form_of_name = remove_trailing_punctuation( fuller_form_of_name).lstrip('(').rstrip(')') if fuller_form_of_name: agent['fuller_form_of_name'] = fuller_form_of_name # 710|711 Organisation elif key[:3] in ['710', '711']: agent['type'] = 'bf:Organisation' if key[:3] == '711': agent['conference'] = True else: agent['conference'] = False if value.get('b'): subordinate_units = [] for subordinate_unit in utils.force_list(value.get('b')): subordinate_units.append(subordinate_unit.rstrip('.')) agent['subordinate_unit'] = subordinate_units if value.get('e'): subordinate_units = agent.get('subordinate_unit', []) for subordinate_unit in utils.force_list(value.get('e')): subordinate_units.append(subordinate_unit.rstrip('.')) agent['subordinate_unit'] = subordinate_units if value.get('n'): conference_number = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'n') conference_number = remove_trailing_punctuation( conference_number).lstrip('(').rstrip(')') if conference_number: agent['conference_number'] = conference_number if value.get('d'): conference_date = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'd') conference_date = remove_trailing_punctuation( conference_date).lstrip('(').rstrip(')') if conference_date: agent['conference_date'] = conference_date if value.get('c'): conference_place = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'c') conference_place = remove_trailing_punctuation( conference_place).lstrip('(').rstrip(')') if conference_place: agent['conference_place'] = conference_place if value.get('4'): roles = [] for role in utils.force_list(value.get('4')): if len(role) != 3: error_print('WARNING CONTRIBUTION ROLE LENGTH:', marc21.bib_id, marc21.rero_id, role) role = role[:3] if role == 'sce': error_print('WARNING CONTRIBUTION ROLE SCE:', marc21.bib_id, marc21.rero_id, 'sce --> aus') role = 'aus' role = role.lower() if role not in _CONTRIBUTION_ROLE: error_print('WARNING CONTRIBUTION ROLE DEFINITION:', marc21.bib_id, marc21.rero_id, role) role = 'ctb' roles.append(role) else: if key[:3] == '100': roles = ['cre'] elif key[:3] == '711': roles = ['aut'] else: roles = ['ctb'] if agent: return {'agent': agent, 'role': list(set(roles))} return None
def marc21_to_subjects(self, key, value): """Get subjects. - create an object : genreForm : for the field 655 subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', '611': 'bf:Organization', '600t': 'bf:Work', '610t': 'bf:Work', '611t': 'bf:Work', '630': 'bf:Work', '650': 'bf:Topic', # or bf:Temporal, changed by code '651': 'bf:Place', '655': 'bf:Topic' } ref_link_per_tag = { '600': 'IdRef agent', '610': 'IdRef agent', '611': 'IdRef agent', '600t': 'IdRef work', '610t': 'IdRef work', '611t': 'IdRef work', '630': 'IdRef work', '650': 'RERO RAMEAU concept', '651': 'Idref place', '655': 'RERO RAMEAU concept' } field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', '611': 'preferred_name', '600t': 'title', '610t': 'title', '611t': 'title', '630': 'title', '650': 'term', '651': 'preferred_name', '655': 'term' } subfield_code_per_tag = { '600': 'abcd', '610': 'ab', '611': 'acden', '600t': 'tpn', '610t': 'tpn', '611t': 't', '630': 'apn', '650': 'a', '651': 'a', '655': 'a' } conference_per_tag = {'610': False, '611': True} source_per_indicator_2 = {'0': 'LCSH', '2': 'MeSH'} indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) subfield_2 = None if subfields_2: subfield_2 = subfields_2[0] subfields_a = utils.force_list(value.get('a', [])) if subfield_2 == 'rero': has_dollar_t = value.get('t') if tag_key in ('600', '610', '611') and has_dollar_t: tag_key += 't' data_type = type_per_tag[tag_key] start_with_digit = False if tag_key == '650': for subfield_a in subfields_a: start_with_digit_regexp = re.compile(r'^\d') match = start_with_digit_regexp.search(subfield_a) if match: data_type = 'bf:Temporal' break subject = { 'type': data_type, } string_build = build_string_from_subfields( value, subfield_code_per_tag[tag_key]) if (tag_key == '655'): # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build if tag_key in ('610', '611'): subject['conference'] = conference_per_tag[tag_key] if tag_key in ('600t', '610t', '611t'): creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( value, subfield_code_per_tag[creator_tag_key]), '.', '.') field_key = 'subjects' if tag_key == '655': field_key = 'genreForm' subfields_0 = utils.force_list(value.get('0')) if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, subfields_0[0], key) if ref: subject = { '$ref': ref, 'type': data_type, } if not subject.get('$ref'): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuvwxyz', ' - ') if term_string: if subfield_2 == 'rerovoc': source = 'rerovoc' else: source = source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], 'source': source } subject_imported[field_data_per_tag[tag_key]] = term_string if tag_key in ('610', '611'): subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: subjects_imported.append(subject_imported) self['subjects_imported'] = subjects_imported
if value.get('c'): qualifier = utils.force_list(value.get('c'))[0] agent['qualifier'] = remove_trailing_punctuation(qualifier) if value.get('d'): date = utils.force_list(value.get('d'))[0] date = date.rstrip(',') dates = remove_trailing_punctuation(date).split('-') with contextlib.suppress(Exception): if date_of_birth := dates[0].strip(): agent['date_of_birth'] = date_of_birth with contextlib.suppress(Exception): if date_of_death := dates[1].strip(): agent['date_of_death'] = date_of_death if value.get('q'): fuller_form_of_name = utils.force_list(value.get('q'))[0] agent['fuller_form_of_name'] = remove_trailing_punctuation( fuller_form_of_name).lstrip('(').rstrip(')') elif key[:3] in ['710', '711']: agent['type'] = 'bf:Organisation' agent['conference'] = key[:3] == '711' if value.get('e'): subordinate_units = [ subordinate_unit.rstrip('.') for subordinate_unit in utils.force_list(value.get('e')) ] agent['subordinate_unit'] = subordinate_units if value.get('n'): numbering = utils.force_list(value.get('n'))[0] agent['numbering'] = remove_trailing_punctuation(numbering).lstrip( '(').rstrip(')')
def marc21_to_contribution(self, key, value): """Get contribution.""" if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']: agent = {'type': 'bf:Person'} if value.get('a'): name = utils.force_list(value.get('a'))[0] agent['preferred_name'] = remove_trailing_punctuation(name) # 100|700 Person if key[:3] in ['100', '700']: if value.get('b'): numeration = utils.force_list(value.get('b'))[0] agent['numeration'] = remove_trailing_punctuation(numeration) if value.get('c'): qualifier = utils.force_list(value.get('c'))[0] agent['qualifier'] = remove_trailing_punctuation(qualifier) if value.get('d'): date = utils.force_list(value.get('d'))[0] date = date.rstrip(',') dates = remove_trailing_punctuation(date).split('-') try: date_of_birth = dates[0].strip() if date_of_birth: agent['date_of_birth'] = date_of_birth except Exception: pass try: date_of_death = dates[1].strip() if date_of_death: agent['date_of_death'] = date_of_death except Exception: pass if value.get('q'): fuller_form_of_name = utils.force_list(value.get('q'))[0] agent['fuller_form_of_name'] = remove_trailing_punctuation( fuller_form_of_name).lstrip('(').rstrip(')') # 710|711 Organisation elif key[:3] in ['710', '711']: agent['type'] = 'bf:Organisation' if key[:3] == '711': agent['conference'] = True else: agent['conference'] = False if value.get('e'): subordinate_units = [] for subordinate_unit in utils.force_list(value.get('e')): subordinate_units.append(subordinate_unit.rstrip('.')) agent['subordinate_unit'] = subordinate_units if value.get('n'): conference_number = utils.force_list(value.get('n'))[0] agent['conference_number'] = remove_trailing_punctuation( conference_number).lstrip('(').rstrip(')') if value.get('d'): conference_date = utils.force_list(value.get('d'))[0] agent['conference_date'] = remove_trailing_punctuation( conference_date).lstrip('(').rstrip(')') if value.get('c'): conference_place = utils.force_list(value.get('c'))[0] agent['conference_place'] = remove_trailing_punctuation( conference_place).lstrip('(').rstrip(')') roles = ['aut'] if value.get('4'): roles = [] for role in utils.force_list(value.get('4')): roles.append(role) else: if key[:3] == '100': roles = ['cre'] elif key[:3] == '711': roles = ['aut'] else: roles = ['ctb'] return {'agent': agent, 'role': roles} else: return None
def marc21_to_subjects_6XX(self, key, value): """Get subjects. - create an object : genreForm : for the field 655 subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ def perform_subdivisions(field): """Perform subject subdivisions from MARC field.""" subdivisions = { 'v': 'genreForm_subdivisions', 'x': 'topic_subdivisions', 'y': 'temporal_subdivisions', 'z': 'place_subdivisions' } for code, subdivision in subdivisions.items(): for subfield_value in utils.force_list(value.get(code, [])): field.setdefault(subdivision, []).append(subfield_value) type_per_tag = { '600': DocumentSubjectType.PERSON, '610': DocumentSubjectType.ORGANISATION, '611': DocumentSubjectType.ORGANISATION, '600t': DocumentSubjectType.WORK, '610t': DocumentSubjectType.WORK, '611t': DocumentSubjectType.WORK, '630': DocumentSubjectType.WORK, '650': DocumentSubjectType.TOPIC, # or bf:Temporal, changed by code '651': DocumentSubjectType.PLACE, '655': DocumentSubjectType.TOPIC } field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', '611': 'preferred_name', '600t': 'title', '610t': 'title', '611t': 'title', '630': 'title', '650': 'term', '651': 'preferred_name', '655': 'term' } subfield_code_per_tag = { '600': 'abcd', '610': 'ab', '611': 'acden', '600t': 'tpn', '610t': 'tpn', '611t': 't', '630': 'apn', '650': 'a', '651': 'a', '655': 'a' } conference_per_tag = { '610': False, '611': True } source_per_indicator_2 = { '0': 'LCSH', '2': 'MeSH' } indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) subfield_2 = subfields_2[0] if subfields_2 else None subfields_a = utils.force_list(value.get('a', [])) if subfield_2 in ['rero', 'gnd', 'idref']: if tag_key in ['600', '610', '611'] and value.get('t'): tag_key += 't' data_type = type_per_tag[tag_key] # `data_type` is Temporal if tag is 650 and a $a start with digit. if tag_key == '650': for subfield_a in subfields_a: if subfield_a[0].isdigit(): data_type = 'bf:Temporal' break subject = { 'type': data_type, } string_build = build_string_from_subfields( value, subfield_code_per_tag[tag_key]) if tag_key == '655': # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build if tag_key in ['610', '611']: subject['conference'] = conference_per_tag[tag_key] if tag_key in ['600t', '610t', '611t']: creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( value, subfield_code_per_tag[creator_tag_key]), '.', '.') field_key = 'genreForm' if tag_key == '655' else 'subjects' subfields_0 = utils.force_list(value.get('0')) if data_type in [DocumentSubjectType.PERSON, DocumentSubjectType.ORGANISATION] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, subfields_0[0], key) if ref: subject = { '$ref': ref, 'type': data_type, } if not subject.get('$ref'): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier perform_subdivisions(subject) if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects elif indicator_2 in ['0', '2']: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: subject_imported = { 'type': type_per_tag[tag_key], 'source': source_per_indicator_2[indicator_2], field_data_per_tag[tag_key]: term_string.rstrip('.') } perform_subdivisions(subject_imported) if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: subjects_imported.append(subject_imported) self['subjects_imported'] = subjects_imported