def unimarc_title(self, key, value): """Get title data. field 200: non repetitive $a : repetitive $e : repetitive $f : repetitive $g : repetitive $h : repetitive $i : repetitive field 510,512,514,515,516,517,518,519,532: repetitive $a : non repetitive $e : repetitive $h : repetitive $i : repetitive """ title_list = [] title = self.get('title', []) # this function will be called for each fields 200, but as we already # process all of them in the first run and the tittle is already build, # there is nothing to do if the title has already been build. if not title: language = unimarc.lang_from_101 responsibilites = [] for tag in [ '200', '510', '512', '514', '515', '516', '517', '518', '519', '532' ]: for field in unimarc.get_alt_graphic_fields(tag=tag): title_data = {} part_list = TitlePartList(part_number_code='h', part_name_code='i') subfields_6 = unimarc.get_subfields(field, '6') subfields_7 = unimarc.get_subfields(field, '7') subfields_a = unimarc.get_subfields(field, 'a') subfields_e = unimarc.get_subfields(field, 'e') language_script_code = '' if subfields_7: language_script_code = \ unimarc.get_language_script(subfields_7[0]) title_type = 'bf:VariantTitle' if tag == '200': title_type = 'bf:Title' elif tag == '510': title_type = 'bf:ParallelTitle' # build title parts index = 1 link = '' if subfields_6: link = subfields_6[0] items = get_field_items(field['subfields']) for blob_key, blob_value in items: if blob_key == 'a': value_data = \ unimarc.build_value_with_alternate_graphic( tag, blob_key, blob_value, index, link, ',.', ':;/-=') title_data['mainTitle'] = value_data if blob_key == 'e': value_data = \ unimarc.build_value_with_alternate_graphic( tag, blob_key, blob_value, index, link, ',.', ':;/-=') title_data['subtitle'] = value_data if blob_key in ['f', 'g'] and tag == '200': value_data = \ unimarc.build_value_with_alternate_graphic( tag, blob_key, blob_value, index, link, ',.', ':;/-=') responsibilites.append(value_data) if blob_key in ['h', 'i']: part_list.update_part([dict(value=blob_value)], blob_key, blob_value) if blob_key != '__order__': index += 1 title_data['type'] = title_type the_part_list = part_list.get_part_list() if the_part_list: title_data['part'] = the_part_list if title_data: title_list.append(title_data) # extract responsibilities if responsibilites: new_responsibility = self.get('responsibilityStatement', []) for resp in responsibilites: new_responsibility.append(resp) self['responsibilityStatement'] = new_responsibility return title_list or None
field_245_a_end_with_colon = re.search(r'\s*:\s*$', subfield_245_a) field_245_a_end_with_semicolon = re.search(r'\s*;\s*$', subfield_245_a) field_245_b_contains_equal = re.search(r'=', subfield_245_b) fields_246 = marc21.get_fields(tag='246') subfield_246_a = '' if fields_246: if subfields_246_a := marc21.get_subfields(fields_246[0], 'a'): subfield_246_a = subfields_246_a[0] tag_link, link = get_field_link_data(value) items = get_field_items(value) index = 1 title_list = [] title_data = {} part_list = TitlePartList(part_number_code='n', part_name_code='p') parallel_titles = [] pararalel_title_data_list = [] pararalel_title_string_set = set() responsibility = {} subfield_selection = {'a', 'b', 'c', 'n', 'p'} for blob_key, blob_value in items: if blob_key in subfield_selection: value_data = marc21.build_value_with_alternate_graphic( '245', blob_key, blob_value, index, link, ',.', ':;/-=') if blob_key in {'a', 'b', 'c'}: subfield_selection.remove(blob_key) if blob_key == 'a': title_data['mainTitle'] = value_data elif blob_key == 'b':
def marc21_to_title(self, key, value): """Get title data. The title data are extracted from the following fields: field 245: $a : non repetitive $b : non repetitive $c : non repetitive $n : repetitive $p : repetitive $6 : non repetitive field 246: $a : non repetitive $n : repetitive $p : repetitive $6 : non repetitive """ # extraction and initialization of data for further processing subfield_245_a = '' subfield_245_b = '' fields_245 = marc21.get_fields(tag='245') if fields_245: subfields_245_a = marc21.get_subfields(fields_245[0], 'a') subfields_245_b = marc21.get_subfields(fields_245[0], 'b') if subfields_245_a: subfield_245_a = subfields_245_a[0] if subfields_245_b: subfield_245_b = subfields_245_b[0] field_245_a_end_with_equal = re.search(r'\s*=\s*$', subfield_245_a) field_245_a_end_with_colon = re.search(r'\s*:\s*$', subfield_245_a) field_245_a_end_with_semicolon = re.search(r'\s*;\s*$', subfield_245_a) field_245_b_contains_equal = re.search(r'=', subfield_245_b) fields_246 = marc21.get_fields(tag='246') subfield_246_a = '' if fields_246: subfields_246_a = marc21.get_subfields(fields_246[0], 'a') if subfields_246_a: subfield_246_a = subfields_246_a[0] tag_link, link = get_field_link_data(value) items = get_field_items(value) index = 1 title_list = [] title_data = {} part_list = TitlePartList(part_number_code='n', part_name_code='p') parallel_titles = [] pararalel_title_data_list = [] pararalel_title_string_set = set() responsibility = {} # parse field 245 subfields for extracting: # main title, subtitle, parallel titles and the title parts subfield_selection = {'a', 'b', 'c', 'n', 'p'} for blob_key, blob_value in items: if blob_key in subfield_selection: value_data = marc21.build_value_with_alternate_graphic( '245', blob_key, blob_value, index, link, ',.', ':;/-=') if blob_key in {'a', 'b', 'c'}: subfield_selection.remove(blob_key) if blob_key == 'a': if value_data: title_data['mainTitle'] = value_data elif blob_key == 'b': if subfield_246_a: subtitle, parallel_titles, pararalel_title_string_set = \ extract_subtitle_and_parallel_titles_from_field_245_b( value_data, field_245_a_end_with_equal) if subtitle: title_data['subtitle'] = subtitle elif not subfield_246_a and value_data: title_data['subtitle'] = value_data elif blob_key == 'c': responsibility = build_responsibility_data(value_data) elif blob_key in ['n', 'p']: part_list.update_part(value_data, blob_key, blob_value) if blob_key != '__order__': index += 1 title_data['type'] = 'bf:Title' the_part_list = part_list.get_part_list() if the_part_list: title_data['part'] = the_part_list if title_data: title_list.append(title_data) for parallel_title in parallel_titles: title_list.append(parallel_title) # extract variant titles variant_title_list = \ marc21.build_variant_title_data(pararalel_title_string_set) for variant_title_data in variant_title_list: title_list.append(variant_title_data) # extract responsibilities if responsibility: new_responsibility = self.get('responsibilityStatement', []) for resp in responsibility: new_responsibility.append(resp) self['responsibilityStatement'] = new_responsibility return title_list or None
def marc21_to_work_access_point(self, key, value): """Get work access point.""" """ * "date_of_work": "[130$f|730$f]" * "miscellaneous_information": "[130$g|130$s|730$g|730$s]" * "language": "[130$l|730$l]" * "form_subdivision": ["[130$k|730$k]"] * "medium_of_performance_for_music": ["[130$m|730$m]"] * "arranged_statement_for_music": "[130$o|730$o]" * "key_for_music": "[130$r|730$r]" [1] Nettoyer la chaîne: supprimer la ponctuation finale "/:;.,=", supprimer en particulier la chaine ". - " """ work = {} tag_key = key[:3] part_list = TitlePartList(part_number_code='n', part_name_code='p') part_selection = {'n', 'p'} items = get_field_items(value) if tag_key in ['130', '730']: work_selection = {'a', 'f', 'k', 'l', 'm', 'o', 'r'} miscellaneous_selection = {'g', 's'} miscellaneous_parts = [] # _WORK_ACCESS_POINT.get(subfield_code) for blob_key, blob_value in items: if blob_key in work_selection: if blob_key in {'k', 'm'}: datas = work.get(_WORK_ACCESS_POINT.get(blob_key), []) datas.append(blob_value) work[_WORK_ACCESS_POINT.get(blob_key)] = datas else: work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value if blob_key in miscellaneous_selection: miscellaneous_parts.append(blob_value) if blob_key in part_selection: part_list.update_part(blob_value, blob_key, blob_value) if miscellaneous_parts: work['miscellaneous_information'] = '. '.join(miscellaneous_parts) if tag_key == '240': for blob_key, blob_value in items: if blob_key in {'a'}: # work[_WORK_ACCESS_POINT.get('a')] = value.get('a') work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value if blob_key in part_selection: part_list.update_part(blob_value, blob_key, blob_value) field_100 = marc21.get_fields('100') if field_100: agent = {} for blob_key, blob_value in field_100[0].get('subfields').items(): agent['type'] = 'bf:Person' if blob_key == 'a': # numeration = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b') agent['preferred_name'] = remove_trailing_punctuation( blob_value) if blob_key == 'b': # numeration = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b') agent['numeration'] = remove_trailing_punctuation( blob_value) if blob_key == 'c': # qualifier = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'c') agent['qualifier'] = remove_trailing_punctuation( blob_value) if blob_key == 'd': # date = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'd') date = blob_value.rstrip(',') dates = remove_trailing_punctuation(date).split('-') try: date_of_birth = dates[0].strip() if date_of_birth: agent['date_of_birth'] = date_of_birth except Exception: pass try: date_of_death = dates[1].strip() if date_of_death: agent['date_of_death'] = date_of_death except Exception: pass if blob_key == 'q': # fuller_form_of_name = not_repetitive( # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'q') agent['fuller_form_of_name'] = remove_trailing_punctuation( blob_value).lstrip('(').rstrip(')') work['agent'] = agent the_part_list = part_list.get_part_list() if the_part_list: work['part'] = the_part_list if work: work_access_points = self.get('work_access_point', []) work_access_points.append(work) self['work_access_point'] = work_access_points