Ejemplo n.º 1
0
def marc21_to_contribution(self, key, value):
    """Get contribution."""
    if key[4] == '2' or key[:3] not in ['100', '700', '710', '711']:
        return None
    agent = {'type': 'bf:Person'}
    if value.get('a'):
        name = utils.force_list(value.get('a'))[0]
        agent['preferred_name'] = remove_trailing_punctuation(name)

        # 100|700 Person
    if key[:3] in ['100', '700']:
        if value.get('b'):
            numeration = utils.force_list(value.get('b'))[0]
            agent['numeration'] = remove_trailing_punctuation(numeration)
        if value.get('c'):
            qualifier = utils.force_list(value.get('c'))[0]
            agent['qualifier'] = remove_trailing_punctuation(qualifier)
        if value.get('d'):
            date = utils.force_list(value.get('d'))[0]
            date = date.rstrip(',')
            dates = remove_trailing_punctuation(date).split('-')
            with contextlib.suppress(Exception):
                if date_of_birth := dates[0].strip():
                    agent['date_of_birth'] = date_of_birth
            with contextlib.suppress(Exception):
                if date_of_death := dates[1].strip():
                    agent['date_of_death'] = date_of_death
Ejemplo n.º 2
0
def marc21_to_author(self, key, value):
    """Get author.

    authors: loop:
    authors.name: 100$a [+ 100$b if it exists] or
        [700$a (+$b if it exists) repetitive] or
        [ 710$a repetitive (+$b if it exists, repetitive)]
    authors.date: 100 $d or 700 $d (facultatif)
    authors.qualifier: 100 $c or 700 $c (facultatif)
    authors.type: if 100 or 700 then person, if 710 then organisation
    """
    if not (key[4] == '2' and (key[:3] == '710' or key[:3] == '700')):
        author = {}
        author['type'] = 'person'
        author['name'] = remove_trailing_punctuation(value.get('a'))
        author_subs = utils.force_list(value.get('b'))
        if author_subs:
            for author_sub in author_subs:
                author['name'] += ' ' + remove_trailing_punctuation(author_sub)
        if key[:3] == '710':
            author['type'] = 'organisation'
        else:
            if value.get('c'):
                author['qualifier'] = remove_trailing_punctuation(
                    value.get('c'))
            if value.get('d'):
                author['date'] = remove_trailing_punctuation(value.get('d'))
        return author
Ejemplo n.º 3
0
        def build_place_or_agent_data(code, label, index, link, add_country):
            type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'}
            place_or_agent_data = {
                'type': type_per_code[code],
                'label': [{
                    'value': remove_trailing_punctuation(label)
                }]
            }

            if add_country:
                if marc21tojson.cantons:
                    place_or_agent_data['canton'] = marc21tojson.cantons
                if marc21tojson.country:
                    place_or_agent_data['country'] = marc21tojson.country
            try:
                alt_gr = marc21tojson.alternate_graphic['264'][link]
                subfield = \
                    marc21tojson.get_subfields(alt_gr['field'])[index]
                place_or_agent_data['label'].append({
                    'value':
                    remove_trailing_punctuation(subfield),
                    'language':
                    '-'.join((marc21tojson.lang_from_008, alt_gr['script']))
                })
            except Exception:
                pass
            return place_or_agent_data
Ejemplo n.º 4
0
def marc21_to_title(self, key, value):
    """Get title.

    title: 245$a
    without the punctuaction. If there's a $b, then 245$a : $b without the ' /'
    """
    main_title = remove_trailing_punctuation(value.get('a'))
    sub_title = value.get('b')
    # responsability = value.get('c')
    if sub_title:
        main_title += ' : ' + ' : '.join(
            utils.force_list(remove_trailing_punctuation(sub_title)))
    return main_title
Ejemplo n.º 5
0
 def build_edition_data(code, label, index, link):
     data = [{'value': remove_trailing_punctuation(label)}]
     try:
         alt_gr = marc21.alternate_graphic['250'][link]
         subfield = \
             marc21.get_subfields(alt_gr['field'])[index]
         data.append({
             'value': remove_trailing_punctuation(subfield),
             'language': get_language_script(alt_gr['script'])
         })
     except Exception as err:
         pass
     return data
Ejemplo n.º 6
0
def edition_format_text(edition):
    """Format edition for _text."""
    designations = edition.get('editionDesignation', [])
    responsibilities = edition.get('responsibility', [])
    designation_output = {}
    for designation in designations:
        language = designation.get('language', 'default')
        value = designation.get('value', '')
        designation_output[language] = value
    responsibility_output = {}
    for responsibility in responsibilities:
        language = responsibility.get('language', 'default')
        value = responsibility.get('value', '')
        responsibility_output[language] = value

    edition_text = []
    for key, value in designation_output.items():
        value = remove_trailing_punctuation(
            '{designation} / {responsibility}'.format(
                designation=designation_output.get(key),
                responsibility=responsibility_output.get(key, ''),
            )
        )
        if display_alternate_graphic_first(key):
            edition_text.insert(0, {'value': value, 'language': key})
        else:
            edition_text.append({'value': value, 'language': key})

    return edition_text
Ejemplo n.º 7
0
def marc21_to_description(self, key, value):
    """Get extent.

    extent: 300$a (the first one if many)
    """
    if value.get('a') and not self.get('extent', None):
        self['extent'] = remove_trailing_punctuation(
            utils.force_list(value.get('a'))[0])
    return None
Ejemplo n.º 8
0
 def build_place_or_agent_data(code, label, index):
     type_per_code = {'a': 'bf:Place', 'c': 'bf:Agent'}
     place_or_agent_data = {
         'type': type_per_code[code],
         'label': [{
             'value': remove_trailing_punctuation(label)
         }]
     }
     return place_or_agent_data
Ejemplo n.º 9
0
def marc21_to_edition_statement(self, key, value):
    """Get edition statement data.

    editionDesignation: 250 [$a non repetitive] (without trailing ponctuation)
    responsibility: 250 [$b non repetitive]
    """
    edition_data = {}
    if subfields_a := utils.force_list(value.get('a')):
        subfield_a = remove_trailing_punctuation(subfields_a[0])
        edition_data['editionDesignation'] = [{'value': subfield_a}]
Ejemplo n.º 10
0
def marc21_to_author(self, key, value):
    """Get author.

    authors: loop:
    authors.name: 100$a [+ 100$b if it exists] or
        [700$a (+$b if it exists) repetitive] or
        [ 710$a repetitive (+$b if it exists, repetitive)]
    authors.date: 100 $d or 700 $d (facultatif)
    authors.qualifier: 100 $c or 700 $c (facultatif)
    authors.type: if 100 or 700 then person, if 710 then organisation
    """
    if not key[4] == '2':
        author = {}
        author['type'] = 'person'
        if value.get('0'):
            refs = utils.force_list(value.get('0'))
            for ref in refs:
                ref = get_person_link(marc21.bib_id, ref, key, value)
                if ref:
                    author['$ref'] = ref
        # we do not have a $ref
        if not author.get('$ref'):
            author['name'] = ''
            if value.get('a'):
                data = not_repetitive(marc21.bib_id, key, value, 'a')
                author['name'] = remove_trailing_punctuation(data)
            author_subs = utils.force_list(value.get('b'))
            if author_subs:
                for author_sub in author_subs:
                    author['name'] += ' ' + \
                        remove_trailing_punctuation(author_sub)
            if key[:3] == '710':
                author['type'] = 'organisation'
            else:
                if value.get('c'):
                    data = not_repetitive(marc21.bib_id, key, value, 'c')
                    author['qualifier'] = remove_trailing_punctuation(data)
                if value.get('d'):
                    data = not_repetitive(marc21.bib_id, key, value, 'd')
                    author['date'] = remove_trailing_punctuation(data)
        return author
    else:
        return None
Ejemplo n.º 11
0
 def build_place_or_agent_data(code, label):
     place_or_agent_data = None
     type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'}
     value = remove_trailing_punctuation(label)
     if value:
         place_or_agent_data = {
             'type': type_per_code[code],
             'label': [{
                 'value': value
             }]
         }
     return place_or_agent_data
Ejemplo n.º 12
0
def marc21_to_description(self, key, value):
    """Get extent, otherMaterialCharacteristics, formats.

    extent: 300$a (the first one if many)
    otherMaterialCharacteristics: 300$b (the first one if many)
    formats: 300 [$c repetitive]
    """
    if value.get('a'):
        if not self.get('extent', None):
            self['extent'] = remove_trailing_punctuation(
                utils.force_list(value.get('a'))[0])
    if value.get('b'):
        if self.get('otherMaterialCharacteristics', []) == []:
            self['otherMaterialCharacteristics'] = remove_trailing_punctuation(
                utils.force_list(value.get('b'))[0])
    if value.get('c'):
        formats = self.get('formats', None)
        if not formats:
            data = value.get('c')
            formats = list(utils.force_list(data))
        return formats
Ejemplo n.º 13
0
 def build_agent_data(code, label, index, link):
     type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'}
     agent_data = {
         'type': type_per_code[code],
         'label': [{
             'value': remove_trailing_punctuation(label)
         }]
     }
     try:
         alt_gr = marc21.alternate_graphic['264'][link]
         subfield = \
             marc21.get_subfields(alt_gr['field'])[index]
         agent_data['label'].append({
             'value':
             remove_trailing_punctuation(subfield),
             'language':
             marc21.get_language_script(alt_gr['script'])
         })
     except Exception as err:
         pass
     return agent_data
Ejemplo n.º 14
0
 def build_place_or_agent_data(code, label, add_country):
     place_or_agent_data = None
     type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'}
     value = remove_trailing_punctuation(label)
     if value:
         place_or_agent_data = {
             'type': type_per_code[code],
             'label': [{
                 'value': value
             }]
         }
     if add_country and marc21.country:
         place_or_agent_data['country'] = marc21.country
     return place_or_agent_data
Ejemplo n.º 15
0
 def build_place_or_agent_data(code, label, index, add_country):
     type_per_code = {'a': 'bf:Place', 'c': 'bf:Agent'}
     place_or_agent_data = {
         'type': type_per_code[code],
         'label': [{
             'value': remove_trailing_punctuation(label)
         }]
     }
     if add_country:
         # country from 102
         field_102 = unimarctojson.get_fields(tag='102')
         if field_102:
             field_102 = field_102[0]
             country_codes = unimarctojson.get_subfields(field_102, 'a')
             if country_codes:
                 place_or_agent_data['country'] = country_codes[0].lower()
     return place_or_agent_data
Ejemplo n.º 16
0
def publication_statement_text(provision_activity):
    """Create publication statement from place, agent and date values."""
    punctuation = {
        'bf:Place': ' ; ',
        'bf:Agent': ' ; ',
        'Date': ', '
    }
    statement_with_language = {'default': ''}
    last_statement_type = None
    # Perform each statement entries to build the best possible string
    for statement in provision_activity.get('statement', []):
        for label in statement['label']:
            language = label.get('language', 'default')
            statement_with_language.setdefault(language, '')
            if statement_with_language[language]:
                if last_statement_type == statement['type']:
                    statement_with_language[language] += punctuation[
                        last_statement_type
                    ]
                elif statement['type'] == 'bf:Place':
                    statement_with_language[language] += ' ; '
                elif statement['type'] == 'Date':
                    statement_with_language[language] += ', '
                else:
                    statement_with_language[language] += ' : '

            statement_with_language[language] += label['value']
        last_statement_type = statement['type']
    # date field: remove ';' and append
    statement_text = []
    for key, value in statement_with_language.items():
        value = remove_trailing_punctuation(value)
        if display_alternate_graphic_first(key):
            statement_text.insert(0, {'value': value, 'language': key})
        else:
            statement_text.append({'value': value, 'language': key})
    return statement_text
Ejemplo n.º 17
0
def unimarc_to_contribution(self, key, value):
    """Get contribution.

    contribution: loop:
    700 Nom de personne – Responsabilité principale
    701 Nom de personne – Autre responsabilité principale
    702 Nom de personne – Responsabilité secondaire
    710 Nom de collectivité – Responsabilité principale
    711 Nom de collectivité – Autre responsabilité principale
    712 Nom de collectivité – Responsabilité secondaire
    """
    agent = {}
    agent['preferred_name'] = ', '.join(utils.force_list(value.get('a', '')))
    agent['type'] = 'bf:Person'
    if agent['preferred_name']:
        if value.get('b'):
            agent['preferred_name'] += \
                ', ' + ', '.join(utils.force_list(value.get('b')))

    if key[:3] in ['700', '701', '702', '703']:
        if value.get('d'):
            agent['numeration'] = value.get('d')

        if value.get('c'):
            agent['qualifier'] = value.get('c')

        if value.get('f'):
            date = utils.force_list(value.get('f'))[0]
            date = date.replace('-....', '-')
            dates = date.split('-')
            try:
                date_of_birth = dates[0].strip()
                if date_of_birth:
                    agent['date_of_birth'] = date_of_birth
            except Exception:
                pass
            try:
                date_of_death = dates[1].strip()
                if date_of_death:
                    agent['date_of_death'] = date_of_death
            except Exception:
                pass

    if key[:3] in ['710', '711', '712']:
        agent['type'] = 'bf:Organisation'
        agent['conference'] = key[3] == '1'
        if agent['preferred_name']:
            if value.get('c'):
                agent['preferred_name'] += \
                    ', ' + ', '.join(utils.force_list(value.get('c')))
        if value.get('d'):
            conference_number = utils.force_list(value.get('d'))[0]
            agent['conference_number'] = remove_trailing_punctuation(
                conference_number
            ).lstrip('(').rstrip(')')
        if value.get('e'):
            conference_place = utils.force_list(value.get('e'))[0]
            agent['conference_place'] = remove_trailing_punctuation(
                conference_place
            ).lstrip('(').rstrip(')')
        if value.get('f'):
            conference_date = utils.force_list(value.get('f'))[0]
            agent['conference_date'] = remove_trailing_punctuation(
                conference_date
            ).lstrip('(').rstrip(')')
    IDREF_ROLE_CONV = {
        "070": "aut",
        "230": "cmp",
        "205": "ctb",
        "340": "edt",
        "420": "hnr",
        "440": "ill",
        "600": "pht",
        "590": "prf",
        "730": "trl",
        "080": "aui",
        "160": "bsl",
        "220": "com",
        "300": "drt",
        "430": "ilu",
        "651": "pbd",
        "350": "egr",
        "630": "pro",
        "510": "ltg",
        "365": "exp",
        "727": "dgs",
        "180": "ctg",
        "220": "com",
        "210": "cmm",
        "200": "chr",
        "110": "bnd",
        "720": "ato",
        "030": "arr",
        "020": "ann",
        "632": "adi",
        "005": "act",
        "390": "fmo",
        "545": "mus"
    }
    roles = []
    if value.get('4'):
        for role in utils.force_list(value.get('4')):
            role_conv = IDREF_ROLE_CONV.get(role)
            if role_conv:
                roles.append(role_conv)
        roles = list(set(roles))
    if not roles:
        roles = ['aut']

    return {
        'agent': agent,
        'role': roles
    }
Ejemplo n.º 18
0
def marc21_to_work_access_point(self, key, value):
    """Get work access point."""
    """
    * "date_of_work": "[130$f|730$f]"
    * "miscellaneous_information": "[130$g|130$s|730$g|730$s]"
    * "language": "[130$l|730$l]"
    * "form_subdivision": ["[130$k|730$k]"]
    * "medium_of_performance_for_music": ["[130$m|730$m]"]
    * "arranged_statement_for_music": "[130$o|730$o]"
    * "key_for_music": "[130$r|730$r]"

    [1] Nettoyer la chaîne: supprimer la ponctuation finale "/:;.,=",
    supprimer en particulier la chaine ". - "
    """

    work = {}
    tag_key = key[:3]
    part_list = TitlePartList(part_number_code='n', part_name_code='p')
    part_selection = {'n', 'p'}
    items = get_field_items(value)
    if tag_key in ['130', '730']:
        work_selection = {'a', 'f', 'k', 'l', 'm', 'o', 'r'}

        miscellaneous_selection = {'g', 's'}
        miscellaneous_parts = []
        # _WORK_ACCESS_POINT.get(subfield_code)
        for blob_key, blob_value in items:
            if blob_key in work_selection:

                if blob_key in {'k', 'm'}:
                    datas = work.get(_WORK_ACCESS_POINT.get(blob_key), [])
                    datas.append(blob_value)
                    work[_WORK_ACCESS_POINT.get(blob_key)] = datas
                else:
                    work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value
            if blob_key in miscellaneous_selection:
                miscellaneous_parts.append(blob_value)
            if blob_key in part_selection:
                part_list.update_part(blob_value, blob_key, blob_value)
        if miscellaneous_parts:
            work['miscellaneous_information'] = '. '.join(miscellaneous_parts)
    if tag_key == '240':
        for blob_key, blob_value in items:
            if blob_key in {'a'}:
                # work[_WORK_ACCESS_POINT.get('a')] = value.get('a')
                work[_WORK_ACCESS_POINT.get(blob_key)] = blob_value

            if blob_key in part_selection:
                part_list.update_part(blob_value, blob_key, blob_value)

        field_100 = marc21.get_fields('100')
        if field_100:
            agent = {}
            for blob_key, blob_value in field_100[0].get('subfields').items():
                agent['type'] = 'bf:Person'
                if blob_key == 'a':
                    # numeration = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b')
                    agent['preferred_name'] = remove_trailing_punctuation(
                        blob_value)
                if blob_key == 'b':
                    # numeration = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'b')
                    agent['numeration'] = remove_trailing_punctuation(
                        blob_value)
                if blob_key == 'c':
                    # qualifier = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'c')
                    agent['qualifier'] = remove_trailing_punctuation(
                        blob_value)
                if blob_key == 'd':
                    # date = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'd')
                    date = blob_value.rstrip(',')
                    dates = remove_trailing_punctuation(date).split('-')
                    try:
                        date_of_birth = dates[0].strip()
                        if date_of_birth:
                            agent['date_of_birth'] = date_of_birth
                    except Exception:
                        pass
                    try:
                        date_of_death = dates[1].strip()
                        if date_of_death:
                            agent['date_of_death'] = date_of_death
                    except Exception:
                        pass
                if blob_key == 'q':
                    # fuller_form_of_name = not_repetitive(
                    # marc21.bib_id, marc21.bib_id, blob_key, blob_value, 'q')
                    agent['fuller_form_of_name'] = remove_trailing_punctuation(
                        blob_value).lstrip('(').rstrip(')')
            work['agent'] = agent

    the_part_list = part_list.get_part_list()
    if the_part_list:
        work['part'] = the_part_list

    if work:
        work_access_points = self.get('work_access_point', [])
        work_access_points.append(work)
        self['work_access_point'] = work_access_points
Ejemplo n.º 19
0
 def build_place_or_agent_data(code, label):
     type_per_code = {'a': 'bf:Place', 'b': 'bf:Agent'}
     return {'type': type_per_code[code], 'label': [{'value': value}]} \
         if (value := remove_trailing_punctuation(label)) else None
Ejemplo n.º 20
0
def marc21_to_contribution(self, key, value):
    """Get contribution."""
    if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']:
        agent = {}
        if value.get('0'):
            refs = utils.force_list(value.get('0'))
            for ref in refs:
                ref = get_person_link(marc21.bib_id, marc21.rero_id, ref, key,
                                      value)
                if ref:
                    agent['$ref'] = ref
        # we do not have a $ref
        if not agent.get('$ref') and value.get('a'):
            agent = {'type': 'bf:Person'}
            if value.get('a'):
                name = not_repetitive(marc21.bib_id, marc21.rero_id, key,
                                      value, 'a').rstrip('.')
                if name:
                    agent['preferred_name'] = name

            # 100|700 Person
            if key[:3] in ['100', '700']:
                if value.get('b'):
                    numeration = not_repetitive(marc21.bib_id, marc21.rero_id,
                                                key, value, 'b')
                    numeration = remove_trailing_punctuation(numeration)
                    if numeration:
                        agent['numeration'] = numeration
                if value.get('c'):
                    qualifier = not_repetitive(marc21.bib_id, marc21.rero_id,
                                               key, value, 'c')
                    agent['qualifier'] = remove_trailing_punctuation(qualifier)
                if value.get('d'):
                    date = not_repetitive(marc21.bib_id, marc21.rero_id, key,
                                          value, 'd')
                    date = date.rstrip(',')
                    dates = remove_trailing_punctuation(date).split('-')
                    try:
                        date_of_birth = dates[0].strip()
                        if date_of_birth:
                            agent['date_of_birth'] = date_of_birth
                    except Exception:
                        pass
                    try:
                        date_of_death = dates[1].strip()
                        if date_of_death:
                            agent['date_of_death'] = date_of_death
                    except Exception:
                        pass
                if value.get('q'):
                    fuller_form_of_name = not_repetitive(
                        marc21.bib_id, marc21.rero_id, key, value, 'q')
                    fuller_form_of_name = remove_trailing_punctuation(
                        fuller_form_of_name).lstrip('(').rstrip(')')
                    if fuller_form_of_name:
                        agent['fuller_form_of_name'] = fuller_form_of_name

            # 710|711 Organisation
            elif key[:3] in ['710', '711']:
                agent['type'] = 'bf:Organisation'
                if key[:3] == '711':
                    agent['conference'] = True
                else:
                    agent['conference'] = False
                if value.get('b'):
                    subordinate_units = []
                    for subordinate_unit in utils.force_list(value.get('b')):
                        subordinate_units.append(subordinate_unit.rstrip('.'))
                    agent['subordinate_unit'] = subordinate_units
                if value.get('e'):
                    subordinate_units = agent.get('subordinate_unit', [])
                    for subordinate_unit in utils.force_list(value.get('e')):
                        subordinate_units.append(subordinate_unit.rstrip('.'))
                    agent['subordinate_unit'] = subordinate_units
                if value.get('n'):
                    conference_number = not_repetitive(marc21.bib_id,
                                                       marc21.rero_id, key,
                                                       value, 'n')
                    conference_number = remove_trailing_punctuation(
                        conference_number).lstrip('(').rstrip(')')
                    if conference_number:
                        agent['conference_number'] = conference_number
                if value.get('d'):
                    conference_date = not_repetitive(marc21.bib_id,
                                                     marc21.rero_id, key,
                                                     value, 'd')
                    conference_date = remove_trailing_punctuation(
                        conference_date).lstrip('(').rstrip(')')
                    if conference_date:
                        agent['conference_date'] = conference_date
                if value.get('c'):
                    conference_place = not_repetitive(marc21.bib_id,
                                                      marc21.rero_id, key,
                                                      value, 'c')
                    conference_place = remove_trailing_punctuation(
                        conference_place).lstrip('(').rstrip(')')
                    if conference_place:
                        agent['conference_place'] = conference_place

        if value.get('4'):
            roles = []
            for role in utils.force_list(value.get('4')):
                if len(role) != 3:
                    error_print('WARNING CONTRIBUTION ROLE LENGTH:',
                                marc21.bib_id, marc21.rero_id, role)
                    role = role[:3]
                if role == 'sce':
                    error_print('WARNING CONTRIBUTION ROLE SCE:',
                                marc21.bib_id, marc21.rero_id, 'sce --> aus')
                    role = 'aus'
                role = role.lower()
                if role not in _CONTRIBUTION_ROLE:
                    error_print('WARNING CONTRIBUTION ROLE DEFINITION:',
                                marc21.bib_id, marc21.rero_id, role)
                    role = 'ctb'
                roles.append(role)
        else:
            if key[:3] == '100':
                roles = ['cre']
            elif key[:3] == '711':
                roles = ['aut']
            else:
                roles = ['ctb']
        if agent:
            return {'agent': agent, 'role': list(set(roles))}
    return None
Ejemplo n.º 21
0
def marc21_to_subjects(self, key, value):
    """Get subjects.

    - create an object :
        genreForm : for the field 655
        subjects :  for 6xx with $2 rero
        subjects_imported : for 6xx having indicator 2 '0' or '2'
    """
    type_per_tag = {
        '600': 'bf:Person',
        '610': 'bf:Organization',
        '611': 'bf:Organization',
        '600t': 'bf:Work',
        '610t': 'bf:Work',
        '611t': 'bf:Work',
        '630': 'bf:Work',
        '650': 'bf:Topic',  # or bf:Temporal, changed by code
        '651': 'bf:Place',
        '655': 'bf:Topic'
    }

    ref_link_per_tag = {
        '600': 'IdRef agent',
        '610': 'IdRef agent',
        '611': 'IdRef agent',
        '600t': 'IdRef work',
        '610t': 'IdRef work',
        '611t': 'IdRef work',
        '630': 'IdRef work',
        '650': 'RERO RAMEAU concept',
        '651': 'Idref place',
        '655': 'RERO RAMEAU concept'
    }

    field_data_per_tag = {
        '600': 'preferred_name',
        '610': 'preferred_name',
        '611': 'preferred_name',
        '600t': 'title',
        '610t': 'title',
        '611t': 'title',
        '630': 'title',
        '650': 'term',
        '651': 'preferred_name',
        '655': 'term'
    }

    subfield_code_per_tag = {
        '600': 'abcd',
        '610': 'ab',
        '611': 'acden',
        '600t': 'tpn',
        '610t': 'tpn',
        '611t': 't',
        '630': 'apn',
        '650': 'a',
        '651': 'a',
        '655': 'a'
    }

    conference_per_tag = {'610': False, '611': True}
    source_per_indicator_2 = {'0': 'LCSH', '2': 'MeSH'}

    indicator_2 = key[4]
    tag_key = key[:3]
    subfields_2 = utils.force_list(value.get('2'))
    subfield_2 = None
    if subfields_2:
        subfield_2 = subfields_2[0]
    subfields_a = utils.force_list(value.get('a', []))

    if subfield_2 == 'rero':
        has_dollar_t = value.get('t')

        if tag_key in ('600', '610', '611') and has_dollar_t:
            tag_key += 't'
        data_type = type_per_tag[tag_key]

        start_with_digit = False
        if tag_key == '650':
            for subfield_a in subfields_a:
                start_with_digit_regexp = re.compile(r'^\d')
                match = start_with_digit_regexp.search(subfield_a)
                if match:
                    data_type = 'bf:Temporal'
                    break

        subject = {
            'type': data_type,
        }

        string_build = build_string_from_subfields(
            value, subfield_code_per_tag[tag_key])
        if (tag_key == '655'):
            # remove the square brackets
            string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build)
        subject[field_data_per_tag[tag_key]] = string_build

        if tag_key in ('610', '611'):
            subject['conference'] = conference_per_tag[tag_key]

        if tag_key in ('600t', '610t', '611t'):
            creator_tag_key = tag_key[:3]  # to keep only tag:  600, 610, 611
            subject['creator'] = remove_trailing_punctuation(
                build_string_from_subfields(
                    value, subfield_code_per_tag[creator_tag_key]), '.', '.')
        field_key = 'subjects'
        if tag_key == '655':
            field_key = 'genreForm'

        subfields_0 = utils.force_list(value.get('0'))
        if data_type in ['bf:Person', 'bf:Organisation'] and subfields_0:
            ref = get_contribution_link(marc21.bib_id, marc21.rero_id,
                                        subfields_0[0], key)
            if ref:
                subject = {
                    '$ref': ref,
                    'type': data_type,
                }
        if not subject.get('$ref'):
            identifier = build_identifier(value)
            if identifier:
                subject['identifiedBy'] = identifier

        if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]):
            subjects = self.get(field_key, [])
            subjects.append(subject)
            self[field_key] = subjects
    elif subfield_2 == 'rerovoc' or indicator_2 in ['0', '2']:
        term_string = build_string_from_subfields(
            value, 'abcdefghijklmnopqrstuvwxyz', ' - ')
        if term_string:
            if subfield_2 == 'rerovoc':
                source = 'rerovoc'
            else:
                source = source_per_indicator_2[indicator_2]
            subject_imported = {
                'type': type_per_tag[tag_key],
                'source': source
            }
            subject_imported[field_data_per_tag[tag_key]] = term_string
            if tag_key in ('610', '611'):
                subject_imported['conference'] = conference_per_tag[tag_key]
            subjects_imported = self.get('subjects_imported', [])
            if subject_imported:
                subjects_imported.append(subject_imported)
                self['subjects_imported'] = subjects_imported
Ejemplo n.º 22
0
        if value.get('c'):
            qualifier = utils.force_list(value.get('c'))[0]
            agent['qualifier'] = remove_trailing_punctuation(qualifier)
        if value.get('d'):
            date = utils.force_list(value.get('d'))[0]
            date = date.rstrip(',')
            dates = remove_trailing_punctuation(date).split('-')
            with contextlib.suppress(Exception):
                if date_of_birth := dates[0].strip():
                    agent['date_of_birth'] = date_of_birth
            with contextlib.suppress(Exception):
                if date_of_death := dates[1].strip():
                    agent['date_of_death'] = date_of_death
        if value.get('q'):
            fuller_form_of_name = utils.force_list(value.get('q'))[0]
            agent['fuller_form_of_name'] = remove_trailing_punctuation(
                fuller_form_of_name).lstrip('(').rstrip(')')

    elif key[:3] in ['710', '711']:
        agent['type'] = 'bf:Organisation'
        agent['conference'] = key[:3] == '711'
        if value.get('e'):
            subordinate_units = [
                subordinate_unit.rstrip('.')
                for subordinate_unit in utils.force_list(value.get('e'))
            ]

            agent['subordinate_unit'] = subordinate_units
        if value.get('n'):
            numbering = utils.force_list(value.get('n'))[0]
            agent['numbering'] = remove_trailing_punctuation(numbering).lstrip(
                '(').rstrip(')')
Ejemplo n.º 23
0
def marc21_to_contribution(self, key, value):
    """Get contribution."""
    if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']:
        agent = {'type': 'bf:Person'}
        if value.get('a'):
            name = utils.force_list(value.get('a'))[0]
            agent['preferred_name'] = remove_trailing_punctuation(name)

        # 100|700 Person
        if key[:3] in ['100', '700']:
            if value.get('b'):
                numeration = utils.force_list(value.get('b'))[0]
                agent['numeration'] = remove_trailing_punctuation(numeration)
            if value.get('c'):
                qualifier = utils.force_list(value.get('c'))[0]
                agent['qualifier'] = remove_trailing_punctuation(qualifier)
            if value.get('d'):
                date = utils.force_list(value.get('d'))[0]
                date = date.rstrip(',')
                dates = remove_trailing_punctuation(date).split('-')
                try:
                    date_of_birth = dates[0].strip()
                    if date_of_birth:
                        agent['date_of_birth'] = date_of_birth
                except Exception:
                    pass
                try:
                    date_of_death = dates[1].strip()
                    if date_of_death:
                        agent['date_of_death'] = date_of_death
                except Exception:
                    pass
            if value.get('q'):
                fuller_form_of_name = utils.force_list(value.get('q'))[0]
                agent['fuller_form_of_name'] = remove_trailing_punctuation(
                    fuller_form_of_name).lstrip('(').rstrip(')')

        # 710|711 Organisation
        elif key[:3] in ['710', '711']:
            agent['type'] = 'bf:Organisation'
            if key[:3] == '711':
                agent['conference'] = True
            else:
                agent['conference'] = False
            if value.get('e'):
                subordinate_units = []
                for subordinate_unit in utils.force_list(value.get('e')):
                    subordinate_units.append(subordinate_unit.rstrip('.'))
                agent['subordinate_unit'] = subordinate_units
            if value.get('n'):
                conference_number = utils.force_list(value.get('n'))[0]
                agent['conference_number'] = remove_trailing_punctuation(
                    conference_number).lstrip('(').rstrip(')')
            if value.get('d'):
                conference_date = utils.force_list(value.get('d'))[0]
                agent['conference_date'] = remove_trailing_punctuation(
                    conference_date).lstrip('(').rstrip(')')
            if value.get('c'):
                conference_place = utils.force_list(value.get('c'))[0]
                agent['conference_place'] = remove_trailing_punctuation(
                    conference_place).lstrip('(').rstrip(')')
        roles = ['aut']
        if value.get('4'):
            roles = []
            for role in utils.force_list(value.get('4')):
                roles.append(role)
        else:
            if key[:3] == '100':
                roles = ['cre']
            elif key[:3] == '711':
                roles = ['aut']
            else:
                roles = ['ctb']
        return {'agent': agent, 'role': roles}
    else:
        return None
Ejemplo n.º 24
0
def marc21_to_subjects_6XX(self, key, value):
    """Get subjects.

    - create an object :
        genreForm : for the field 655
        subjects :  for 6xx with $2 rero
        subjects_imported : for 6xx having indicator 2 '0' or '2'
    """

    def perform_subdivisions(field):
        """Perform subject subdivisions from MARC field."""
        subdivisions = {
            'v': 'genreForm_subdivisions',
            'x': 'topic_subdivisions',
            'y': 'temporal_subdivisions',
            'z': 'place_subdivisions'
        }
        for code, subdivision in subdivisions.items():
            for subfield_value in utils.force_list(value.get(code, [])):
                field.setdefault(subdivision, []).append(subfield_value)

    type_per_tag = {
        '600': DocumentSubjectType.PERSON,
        '610': DocumentSubjectType.ORGANISATION,
        '611': DocumentSubjectType.ORGANISATION,
        '600t': DocumentSubjectType.WORK,
        '610t': DocumentSubjectType.WORK,
        '611t': DocumentSubjectType.WORK,
        '630': DocumentSubjectType.WORK,
        '650': DocumentSubjectType.TOPIC,  # or bf:Temporal, changed by code
        '651': DocumentSubjectType.PLACE,
        '655': DocumentSubjectType.TOPIC
    }

    field_data_per_tag = {
        '600': 'preferred_name',
        '610': 'preferred_name',
        '611': 'preferred_name',
        '600t': 'title',
        '610t': 'title',
        '611t': 'title',
        '630': 'title',
        '650': 'term',
        '651': 'preferred_name',
        '655': 'term'
    }

    subfield_code_per_tag = {
        '600': 'abcd',
        '610': 'ab',
        '611': 'acden',
        '600t': 'tpn',
        '610t': 'tpn',
        '611t': 't',
        '630': 'apn',
        '650': 'a',
        '651': 'a',
        '655': 'a'
    }

    conference_per_tag = {
        '610': False,
        '611': True
    }
    source_per_indicator_2 = {
        '0': 'LCSH',
        '2': 'MeSH'
    }

    indicator_2 = key[4]
    tag_key = key[:3]
    subfields_2 = utils.force_list(value.get('2'))
    subfield_2 = subfields_2[0] if subfields_2 else None
    subfields_a = utils.force_list(value.get('a', []))

    if subfield_2 in ['rero', 'gnd', 'idref']:
        if tag_key in ['600', '610', '611'] and value.get('t'):
            tag_key += 't'
        data_type = type_per_tag[tag_key]

        # `data_type` is Temporal if tag is 650 and a $a start with digit.
        if tag_key == '650':
            for subfield_a in subfields_a:
                if subfield_a[0].isdigit():
                    data_type = 'bf:Temporal'
                    break

        subject = {
            'type': data_type,
        }

        string_build = build_string_from_subfields(
            value, subfield_code_per_tag[tag_key])
        if tag_key == '655':
            # remove the square brackets
            string_build = re.sub(r'^\[(.*)\]$', r'\1', string_build)
        subject[field_data_per_tag[tag_key]] = string_build

        if tag_key in ['610', '611']:
            subject['conference'] = conference_per_tag[tag_key]

        if tag_key in ['600t', '610t', '611t']:
            creator_tag_key = tag_key[:3]  # to keep only tag:  600, 610, 611
            subject['creator'] = remove_trailing_punctuation(
                build_string_from_subfields(
                    value, subfield_code_per_tag[creator_tag_key]), '.', '.')
        field_key = 'genreForm' if tag_key == '655' else 'subjects'
        subfields_0 = utils.force_list(value.get('0'))
        if data_type in [DocumentSubjectType.PERSON,
                         DocumentSubjectType.ORGANISATION] and subfields_0:
            ref = get_contribution_link(marc21.bib_id, marc21.rero_id,
                                        subfields_0[0], key)
            if ref:
                subject = {
                    '$ref': ref,
                    'type': data_type,
                }
        if not subject.get('$ref'):
            identifier = build_identifier(value)
            if identifier:
                subject['identifiedBy'] = identifier
            perform_subdivisions(subject)

        if subject.get('$ref') or subject.get(field_data_per_tag[tag_key]):
            subjects = self.get(field_key, [])
            subjects.append(subject)
            self[field_key] = subjects
    elif indicator_2 in ['0', '2']:
        term_string = build_string_from_subfields(
            value, 'abcdefghijklmnopqrstuw', ' - ')
        if term_string:
            subject_imported = {
                'type': type_per_tag[tag_key],
                'source': source_per_indicator_2[indicator_2],
                field_data_per_tag[tag_key]: term_string.rstrip('.')
            }
            perform_subdivisions(subject_imported)
            if tag_key in ['610', '611']:
                subject_imported['conference'] = conference_per_tag[tag_key]
            subjects_imported = self.get('subjects_imported', [])
            if subject_imported:
                subjects_imported.append(subject_imported)
                self['subjects_imported'] = subjects_imported