def marc21_to_subjects_imported(self, key, value): """Get subject and genreForm_imported imported from 919 (L53, L54).""" specific_contains_regexp = \ re.compile(r'\[(carte postale|affiche|document photographique)\]') contains_specific_voc_regexp = re.compile( r'^(chrero|rerovoc|ram|rameau|gnd|rerovoc|gatbegr|gnd-content)$') subfields_2 = utils.force_list(value.get('2')) term_string = '' data_imported = None field_key = 'subjects_imported' if subfields_2: subfield_2 = subfields_2[0] match = contains_specific_voc_regexp.search(subfield_2) if match: add_data_imported = False if subfield_2 == 'chrero': subfields_9 = utils.force_list(value.get('9')) subfield_9 = subfields_9[0] subfields_v = utils.force_list(value.get('v')) if subfields_v: subfield_v = subfields_v[0] match = specific_contains_regexp.search(subfield_v) if match: contains_655_regexp = re.compile(r'655') match = contains_655_regexp.search(subfield_9) add_data_imported = True if match: field_key = 'genreForm_imported' else: add_data_imported = True if subfield_2 == 'gatbegr' or subfield_2 == 'gnd-content': field_key = 'genreForm_imported' if add_data_imported: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuvwxyz', ' - ') data_imported = { 'type': 'bf:Topic', 'source': subfield_2, 'term': term_string } else: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuvwxyz', ' - ') if term_string: data_imported = {'type': 'bf:Topic', 'term': term_string} if data_imported: subjects_or_genre_form_imported_imported = self.get(field_key, []) subjects_or_genre_form_imported_imported.append(data_imported) self[field_key] = subjects_or_genre_form_imported_imported
def marc21_to_subjects_imported(self, key, value): """Get subjects. - create an object : genreForm : for the field 655 subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ def perform_subdivisions(field): """Perform subject subdivisions from MARC field.""" subdivisions = { 'v': 'genreForm_subdivisions', 'x': 'topic_subdivisions', 'y': 'temporal_subdivisions', 'z': 'place_subdivisions' } for code, subdivision in subdivisions.items(): for subfield_value in utils.force_list(value.get(code, [])): field.setdefault(subdivision, []).append(subfield_value) type_per_tag = { '600': 'bf:Person', '610': 'bf:Organisation', '611': 'bf:Organisation', '600t': 'bf:Work', '610t': 'bf:Work', '611t': 'bf:Work', '630': 'bf:Work', '650': 'bf:Topic', # or bf:Temporal, changed by code '651': 'bf:Place', '655': 'bf:Topic' } field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', '611': 'preferred_name', '600t': 'title', '610t': 'title', '611t': 'title', '630': 'title', '650': 'term', '651': 'preferred_name', '655': 'term' } conference_per_tag = {'610': False, '611': True} source_per_indicator_2 = {'7': 'LCSH', '2': 'MeSH'} indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) subfield_2 = subfields_2[0] if subfields_2 else None if subfield_2 == 'lcsh' or indicator_2 in ['0', '2', '7']: term_string = build_string_from_subfields(value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: source = 'LCSH' if subfield_2 == 'lcsh' else \ source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], 'source': source, field_data_per_tag[tag_key]: term_string.rstrip('.') } perform_subdivisions(subject_imported) if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: subjects_imported.append(subject_imported) self['subjects_imported'] = subjects_imported
def marc21_to_subjects(self, key, value): """Get subjects. - create an object : genreForm : for the field 655 subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', '611': 'bf:Organization', '600t': 'bf:Work', '610t': 'bf:Work', '611t': 'bf:Work', '630': 'bf:Work', '650': 'bf:Topic', # or bf:Temporal, changed by code '651': 'bf:Place', '655': 'bf:Topic' } ref_link_per_tag = { '600': 'IdRef agent', '610': 'IdRef agent', '611': 'IdRef agent', '600t': 'IdRef work', '610t': 'IdRef work', '611t': 'IdRef work', '630': 'IdRef work', '650': 'RERO RAMEAU concept', '651': 'Idref place', '655': 'RERO RAMEAU concept' } field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', '611': 'preferred_name', '600t': 'title', '610t': 'title', '611t': 'title', '630': 'title', '650': 'term', '651': 'preferred_name', '655': 'term' } subfield_code_per_tag = { '600': 'abcd', '610': 'ab', '611': 'acden', '600t': 'tpn', '610t': 'tpn', '611t': 't', '630': 'apn', '650': 'a', '651': 'a', '655': 'a' } conference_per_tag = {'610': False, '611': True} source_per_indicator_2 = {'0': 'LCSH', '2': 'MeSH'} indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) subfield_2 = None if subfields_2: subfield_2 = subfields_2[0] subfields_a = utils.force_list(value.get('a', [])) if subfield_2 == 'rero': has_dollar_t = value.get('t') if tag_key in ('600', '610', '611') and has_dollar_t: tag_key += 't' data_type = type_per_tag[tag_key] start_with_digit = False if tag_key == '650': for subfield_a in subfields_a: start_with_digit_regexp = re.compile(r'^\d') match = start_with_digit_regexp.search(subfield_a) if match: data_type = 'bf:Temporal' break subject = { 'type': data_type, } string_build = build_string_from_subfields( value, subfield_code_per_tag[tag_key]) if (tag_key == '655'): # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build if tag_key in ('610', '611'): subject['conference'] = conference_per_tag[tag_key] if tag_key in ('600t', '610t', '611t'): creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( value, subfield_code_per_tag[creator_tag_key]), '.', '.') field_key = 'subjects' if tag_key == '655': field_key = 'genreForm' subfields_0 = utils.force_list(value.get('0')) if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, subfields_0[0], key) if ref: subject = { '$ref': ref, 'type': data_type, } if not subject.get('$ref'): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuvwxyz', ' - ') if term_string: if subfield_2 == 'rerovoc': source = 'rerovoc' else: source = source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], 'source': source } subject_imported[field_data_per_tag[tag_key]] = term_string if tag_key in ('610', '611'): subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: subjects_imported.append(subject_imported) self['subjects_imported'] = subjects_imported
def marc21_to_subjects_imported(self, key, value): """Get subjects. - create an object : genreForm : for the field 655 subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ type_per_tag = { '600': 'bf:Person', '610': 'bf:Organization', '611': 'bf:Organization', '600t': 'bf:Work', '610t': 'bf:Work', '611t': 'bf:Work', '630': 'bf:Work', '650': 'bf:Topic', # or bf:Temporal, changed by code '651': 'bf:Place', '655': 'bf:Topic' } ref_link_per_tag = { '600': 'IdRef agent', '610': 'IdRef agent', '611': 'IdRef agent', '600t': 'IdRef work', '610t': 'IdRef work', '611t': 'IdRef work', '630': 'IdRef work', '650': 'RERO RAMEAU concept', '651': 'Idref place', '655': 'RERO RAMEAU concept' } field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', '611': 'preferred_name', '600t': 'title', '610t': 'title', '611t': 'title', '630': 'title', '650': 'term', '651': 'preferred_name', '655': 'term' } subfield_code_per_tag = { '600': 'abcd', '610': 'ab', '611': 'acden', '600t': 'tpn', '610t': 'tpn', '611t': 't', '630': 'apn', '650': 'a', '651': 'a', '655': 'a' } conference_per_tag = {'610': False, '611': True} source_per_indicator_2 = {'7': 'LCSH', '2': 'MeSH'} indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) subfield_2 = None if subfields_2: subfield_2 = subfields_2[0] subfields_a = utils.force_list(value.get('a', [])) if subfield_2 == 'lcsh' or indicator_2 in ['0', '2', '7']: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuvwxyz', ' - ') if term_string: if subfield_2 == 'lcsh': source = 'LCSH' else: source = source_per_indicator_2[indicator_2] subject_imported = { 'type': type_per_tag[tag_key], 'source': source } subject_imported[field_data_per_tag[tag_key]] = \ term_string.rstrip('.') if tag_key in ('610', '611'): subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: subjects_imported.append(subject_imported) self['subjects_imported'] = subjects_imported
def marc21_to_series(self, key, value): """Get series. series.name: [490$a repetitive] series.number: [490$v repetitive] """ if key[:3] == '490': marc21.extract_series_statement_from_marc_field(key, value, self) else: # we have a 440 series statement """ 440$a = seriesTitle 1. if $p: 440$n$p = subseriesTitle 440$v = subseriesEnumeration 2. if $v and $n but no $p: 440$n,$v = seriesEnumeration 3 if $n but no $p and no $v: 440$n = subseriesTitle 4. if no $p and no $n: 440$v = seriesEnumeration remove final punctuation "." or "," or ";" in seriesTitle """ if value.get('a'): series = {} subseriesStatement = {} subfield_a = remove_punctuation(utils.force_list( value.get('a'))[0], with_dot=True) series['seriesTitle'] = [{'value': subfield_a}] if value.get('p'): """ 440$n$p = subseriesTitle 440$v = subseriesEnumeration """ string_build = build_string_from_subfields(value, 'np', ' ') subseriesStatement['subseriesTitle'] = [{ 'value': remove_punctuation(string_build, with_dot=True).rstrip() }] if value.get('v'): parts = [] for subfield_v in utils.force_list(value.get('v')): parts.append(remove_punctuation(subfield_v)) subseriesStatement['subseriesEnumeration'] = [{ 'value': '/'.join(parts) }] series['subseriesStatement'] = [subseriesStatement] else: if value.get('n'): if value.get('v'): string_build = build_string_from_subfields(value, 'nv') series['seriesEnumeration'] = [{ 'value': remove_punctuation(string_build, with_dot=True).rstrip() }] else: if value.get('n'): subseriesStatement['subseriesTitle'] = [{ 'value': ''.join(utils.force_list(value.get('n'))) }] series['subseriesStatement'] = [subseriesStatement] elif value.get('v'): parts = [] for subfield_v in utils.force_list(value.get('v')): parts.append(remove_punctuation(subfield_v)) series['seriesEnumeration'] = [{'value': '/'.join(parts)}] # marc21.extract_series_statement_from_440_field(value, self) self['seriesStatement'] = self.get('seriesStatement', []) self['seriesStatement'].append(series) return None
def marc21_to_subjects_6XX(self, key, value): """Get subjects. - create an object : genreForm : for the field 655 subjects : for 6xx with $2 rero subjects_imported : for 6xx having indicator 2 '0' or '2' """ def perform_subdivisions(field): """Perform subject subdivisions from MARC field.""" subdivisions = { 'v': 'genreForm_subdivisions', 'x': 'topic_subdivisions', 'y': 'temporal_subdivisions', 'z': 'place_subdivisions' } for code, subdivision in subdivisions.items(): for subfield_value in utils.force_list(value.get(code, [])): field.setdefault(subdivision, []).append(subfield_value) type_per_tag = { '600': DocumentSubjectType.PERSON, '610': DocumentSubjectType.ORGANISATION, '611': DocumentSubjectType.ORGANISATION, '600t': DocumentSubjectType.WORK, '610t': DocumentSubjectType.WORK, '611t': DocumentSubjectType.WORK, '630': DocumentSubjectType.WORK, '650': DocumentSubjectType.TOPIC, # or bf:Temporal, changed by code '651': DocumentSubjectType.PLACE, '655': DocumentSubjectType.TOPIC } field_data_per_tag = { '600': 'preferred_name', '610': 'preferred_name', '611': 'preferred_name', '600t': 'title', '610t': 'title', '611t': 'title', '630': 'title', '650': 'term', '651': 'preferred_name', '655': 'term' } subfield_code_per_tag = { '600': 'abcd', '610': 'ab', '611': 'acden', '600t': 'tpn', '610t': 'tpn', '611t': 't', '630': 'apn', '650': 'a', '651': 'a', '655': 'a' } conference_per_tag = { '610': False, '611': True } source_per_indicator_2 = { '0': 'LCSH', '2': 'MeSH' } indicator_2 = key[4] tag_key = key[:3] subfields_2 = utils.force_list(value.get('2')) subfield_2 = subfields_2[0] if subfields_2 else None subfields_a = utils.force_list(value.get('a', [])) if subfield_2 in ['rero', 'gnd', 'idref']: if tag_key in ['600', '610', '611'] and value.get('t'): tag_key += 't' data_type = type_per_tag[tag_key] # `data_type` is Temporal if tag is 650 and a $a start with digit. if tag_key == '650': for subfield_a in subfields_a: if subfield_a[0].isdigit(): data_type = 'bf:Temporal' break subject = { 'type': data_type, } string_build = build_string_from_subfields( value, subfield_code_per_tag[tag_key]) if tag_key == '655': # remove the square brackets string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build) subject[field_data_per_tag[tag_key]] = string_build if tag_key in ['610', '611']: subject['conference'] = conference_per_tag[tag_key] if tag_key in ['600t', '610t', '611t']: creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611 subject['creator'] = remove_trailing_punctuation( build_string_from_subfields( value, subfield_code_per_tag[creator_tag_key]), '.', '.') field_key = 'genreForm' if tag_key == '655' else 'subjects' subfields_0 = utils.force_list(value.get('0')) if data_type in [DocumentSubjectType.PERSON, DocumentSubjectType.ORGANISATION] and subfields_0: ref = get_contribution_link(marc21.bib_id, marc21.rero_id, subfields_0[0], key) if ref: subject = { '$ref': ref, 'type': data_type, } if not subject.get('$ref'): identifier = build_identifier(value) if identifier: subject['identifiedBy'] = identifier perform_subdivisions(subject) if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]): subjects = self.get(field_key, []) subjects.append(subject) self[field_key] = subjects elif indicator_2 in ['0', '2']: term_string = build_string_from_subfields( value, 'abcdefghijklmnopqrstuw', ' - ') if term_string: subject_imported = { 'type': type_per_tag[tag_key], 'source': source_per_indicator_2[indicator_2], field_data_per_tag[tag_key]: term_string.rstrip('.') } perform_subdivisions(subject_imported) if tag_key in ['610', '611']: subject_imported['conference'] = conference_per_tag[tag_key] subjects_imported = self.get('subjects_imported', []) if subject_imported: subjects_imported.append(subject_imported) self['subjects_imported'] = subjects_imported
def marc21_to_tableOfContents(self, key, value): """Get tableOfContents from repetitive field 464.""" table_of_contents = build_string_from_subfields(value, 't') if table_of_contents: self.setdefault('tableOfContents', []).append(table_of_contents)