Esempio n. 1
0
def get_contribution_link(bibid, reroid, id, key, value):
    """Get MEF contribution link."""
    # https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677
    prod_host = 'mef.rero.ch'
    test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch')
    mef_url = 'https://{host}/api/'.format(host=test_host)

    match = IDREF_REF_REGEX.search(id)
    if match:
        pid = match.group(1)
        if key[:3] in ['100', '600', '610', '611', '700', '710', '711']:
            # contribution
            url = "{mef}idref/{pid}".format(mef=mef_url, pid=pid)
            try:
                request = requests.get(url=url)
            except requests.exceptions.RequestException as err:
                error_print('ERROR MEF ACCESS:', bibid, reroid, url, err)
                return None
            if request.status_code == requests.codes.ok:
                return url.replace(test_host, prod_host)
            else:
                subfiels = []
                for v, k in value.items():
                    if v != '__order__':
                        subfiels.append('${v} {k}'.format(v=v, k=k))
                subfiels = ' '.join(subfiels)
                field = '{key} {subfiels}'.format(key=key, subfiels=subfiels)
                error_print('WARNING MEF CONTRIBUTION IDREF NOT FOUND:', bibid,
                            reroid, field, url, request.status_code)
Esempio n. 2
0
def get_language_script(script):
    """Build the language script code.

    This code is built according to the format
    <lang_code>-<script_code> for example: chi-hani;
    the <lang_code> is retrived from field 008 and 041
    the <script_code> is received as parameter
    """
    languages_scripts = {
        'arab': ('ara', 'per'),
        'cyrl': ('bel', 'chu', 'mac', 'rus', 'srp', 'ukr'),
        'grek': ('grc', 'gre'),
        'hani': ('chi', 'jpn'),
        'hebr': ('heb', 'lad', 'yid'),
        'jpan': ('jpn', ),
        'kore': ('kor', ),
        'zyyy': ('chi', )
    }
    if script in languages_scripts:
        languages = ([marc21.lang_from_008] + marc21.langs_from_041_a +
                     marc21.langs_from_041_h)
        for lang in languages:
            if lang in languages_scripts[script]:
                return '-'.join([lang, script])
        error_print('WARNING LANGUAGE SCRIPTS:', marc21.bib_id, script, '008:',
                    marc21.lang_from_008, '041$a:', marc21.langs_from_041_a,
                    '041$h:', marc21.langs_from_041_h)
    return '-'.join(['und', script])
Esempio n. 3
0
def marc21_to_electronicLocator_from_field_856(self, key, value):
    """Get electronicLocator from field 856."""
    if value.get('u'):
        electronic_locator_type = {
            '0': 'resource',
            '1': 'versionOfResource',
            '2': 'relatedResource',
            '8': 'hiddenUrl'
        }
        electronic_locator_content = [
            'poster', 'audio', 'postcard', 'addition', 'debriefing',
            'exhibitionDocumentation', 'erratum', 'bookplate', 'extract',
            'educationalSheet', 'illustrations', 'coverImage',
            'deliveryInformation', 'biographicalInformation',
            'introductionPreface', 'classReading', "teachersKit",
            "publishersNote", 'noteOnContent', 'titlePage', 'photography',
            'summarization'
            "summarization", "onlineResourceViaRERODOC", "pressReview",
            "webSite", "tableOfContents", "fullText", "video"
        ]
        electronic_locators = self.get('electronicLocator', [])
        indicator2 = key[4]
        content = None
        if value.get('3'):
            content = utils.force_list(value.get('3'))[0]
        public_note = []
        if content and content not in electronic_locator_content:
            public_note.append(content)
        if value.get('z'):
            for subfield_z in utils.force_list(value.get('z')):
                public_note.append(subfield_z)

        for url in utils.force_list(value.get('u')):
            electronic_locator = {
                'url': url,
                'type': electronic_locator_type.get(indicator2, 'noInfo')
            }
            if content:
                if content in electronic_locator_content:
                    electronic_locator['content'] = content
            if public_note:
                electronic_locator['publicNote'] = public_note
            if len(electronic_locator['url']) >= 7:
                electronic_locators.append(electronic_locator)
            else:
                error_print('WARNING ELECTRONICLOCATOR:', marc21.bib_id,
                            marc21.rero_id, electronic_locator['url'])
        return electronic_locators or None
Esempio n. 4
0
def get_person_link(bibid, reroid, id, key, value):
    """Get MEF person link."""
    # https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677
    prod_host = 'mef.rero.ch'
    test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch')
    mef_url = 'https://{host}/api/'.format(host=test_host)
    mef_link = None
    try:
        identifier = id[1:].split(')')
        url = "{mef}mef/?q={org}.pid:{pid}".format(mef=mef_url,
                                                   org=identifier[0].lower(),
                                                   pid=identifier[1])
        request = requests.get(url=url)
        if request.status_code == requests.codes.ok:
            pid = None
            data = request.json()
            hits = data.get('hits', {}).get('hits')
            if hits:
                idref = hits[0].get('metadata', {}).get('idref')
                gnd = hits[0].get('metadata', {}).get('gnd')
                rero = hits[0].get('metadata', {}).get('rero')
                if idref:
                    pid_type = 'idref'
                    pid = idref['pid']
                elif gnd:
                    pid_type = 'gnd'
                    pid = gnd['pid']
                elif rero:
                    pid_type = 'rero'
                    pid = rero['pid']
            if pid:
                mef_link = "{url}{pid_type}/{pid}".format(url=mef_url,
                                                          pid_type=pid_type,
                                                          pid=pid)
                mef_link = mef_link.replace(test_host, prod_host)
        else:
            error_print('ERROR MEF REQUEST:', bibid, reroid, url,
                        request.status_code)

    except Exception as err:
        error_print('WARNING NOT MEF REF:', bibid, id, key, value, err)
    return mef_link
Esempio n. 5
0
def marc21_to_language(self, key, value):
    """Get languages.

    languages: 008 and 041 [$a, repetitive]
    """
    lang_codes = []
    language = self.get('language', [])
    if marc21.lang_from_008:
        language.append({'value': marc21.lang_from_008, 'type': 'bf:Language'})
        lang_codes.append(marc21.lang_from_008)
    for lang_value in marc21.langs_from_041_a:
        if lang_value not in lang_codes:
            language.append({
                'value': lang_value.strip(),
                'type': 'bf:Language'
            })
            lang_codes.append(lang_value)
    # default provisionActivity if we have no 264
    fields_264 = marc21.get_fields(tag='264')
    valid_264 = False
    for field_264 in fields_264:
        valid_264 = valid_264 or field_264['ind2'] in ['0', '1', '2', '3']
    if not valid_264:
        if fields_264:
            error_print('WARNING INVALID 264', marc21.bib_id, marc21.rero_id,
                        fields_264)
        self['provisionActivity'] = [{'type': 'bf:Publication'}]
        if (marc21.date_type_from_008 == 'q'
                or marc21.date_type_from_008 == 'n'):
            self['provisionActivity'][0][
                'note'] = 'Date(s) uncertain or unknown'
        start_date = make_year(marc21.date1_from_008)
        if not start_date or start_date > 2050:
            error_print('WARNING START DATE 008:', marc21.bib_id,
                        marc21.rero_id, marc21.date1_from_008)
            start_date = 2050
            self['provisionActivity'][0][
                'note'] = 'Date not available and automatically set to 2050'
        self['provisionActivity'][0]['startDate'] = start_date
        end_date = make_year(marc21.date2_from_008)
        if end_date:
            if end_date > 2050:
                error_print('WARNING END DATE 008:', marc21.bib_id,
                            marc21.rero_id, marc21.date1_from_008)
            else:
                self['provisionActivity'][0]['endDate'] = end_date

    # if not language:
    #     error_print('ERROR LANGUAGE:', marc21.bib_id, 'set to "und"')
    #     language = [{'value': 'und', 'type': 'bf:Language'}]
    return language or None
Esempio n. 6
0
def marc21_to_contribution(self, key, value):
    """Get contribution."""
    if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']:
        agent = {}
        if value.get('0'):
            refs = utils.force_list(value.get('0'))
            for ref in refs:
                ref = get_person_link(marc21.bib_id, marc21.rero_id, ref, key,
                                      value)
                if ref:
                    agent['$ref'] = ref
        # we do not have a $ref
        if not agent.get('$ref') and value.get('a'):
            agent = {'type': 'bf:Person'}
            if value.get('a'):
                name = not_repetitive(marc21.bib_id, marc21.rero_id, key,
                                      value, 'a').rstrip('.')
                if name:
                    agent['preferred_name'] = name

            # 100|700 Person
            if key[:3] in ['100', '700']:
                if value.get('b'):
                    numeration = not_repetitive(marc21.bib_id, marc21.rero_id,
                                                key, value, 'b')
                    numeration = remove_trailing_punctuation(numeration)
                    if numeration:
                        agent['numeration'] = numeration
                if value.get('c'):
                    qualifier = not_repetitive(marc21.bib_id, marc21.rero_id,
                                               key, value, 'c')
                    agent['qualifier'] = remove_trailing_punctuation(qualifier)
                if value.get('d'):
                    date = not_repetitive(marc21.bib_id, marc21.rero_id, key,
                                          value, 'd')
                    date = date.rstrip(',')
                    dates = remove_trailing_punctuation(date).split('-')
                    try:
                        date_of_birth = dates[0].strip()
                        if date_of_birth:
                            agent['date_of_birth'] = date_of_birth
                    except Exception:
                        pass
                    try:
                        date_of_death = dates[1].strip()
                        if date_of_death:
                            agent['date_of_death'] = date_of_death
                    except Exception:
                        pass
                if value.get('q'):
                    fuller_form_of_name = not_repetitive(
                        marc21.bib_id, marc21.rero_id, key, value, 'q')
                    fuller_form_of_name = remove_trailing_punctuation(
                        fuller_form_of_name).lstrip('(').rstrip(')')
                    if fuller_form_of_name:
                        agent['fuller_form_of_name'] = fuller_form_of_name

            # 710|711 Organisation
            elif key[:3] in ['710', '711']:
                agent['type'] = 'bf:Organisation'
                if key[:3] == '711':
                    agent['conference'] = True
                else:
                    agent['conference'] = False
                if value.get('b'):
                    subordinate_units = []
                    for subordinate_unit in utils.force_list(value.get('b')):
                        subordinate_units.append(subordinate_unit.rstrip('.'))
                    agent['subordinate_unit'] = subordinate_units
                if value.get('e'):
                    subordinate_units = agent.get('subordinate_unit', [])
                    for subordinate_unit in utils.force_list(value.get('e')):
                        subordinate_units.append(subordinate_unit.rstrip('.'))
                    agent['subordinate_unit'] = subordinate_units
                if value.get('n'):
                    conference_number = not_repetitive(marc21.bib_id,
                                                       marc21.rero_id, key,
                                                       value, 'n')
                    conference_number = remove_trailing_punctuation(
                        conference_number).lstrip('(').rstrip(')')
                    if conference_number:
                        agent['conference_number'] = conference_number
                if value.get('d'):
                    conference_date = not_repetitive(marc21.bib_id,
                                                     marc21.rero_id, key,
                                                     value, 'd')
                    conference_date = remove_trailing_punctuation(
                        conference_date).lstrip('(').rstrip(')')
                    if conference_date:
                        agent['conference_date'] = conference_date
                if value.get('c'):
                    conference_place = not_repetitive(marc21.bib_id,
                                                      marc21.rero_id, key,
                                                      value, 'c')
                    conference_place = remove_trailing_punctuation(
                        conference_place).lstrip('(').rstrip(')')
                    if conference_place:
                        agent['conference_place'] = conference_place

        if value.get('4'):
            roles = []
            for role in utils.force_list(value.get('4')):
                if len(role) != 3:
                    error_print('WARNING CONTRIBUTION ROLE LENGTH:',
                                marc21.bib_id, marc21.rero_id, role)
                    role = role[:3]
                if role == 'sce':
                    error_print('WARNING CONTRIBUTION ROLE SCE:',
                                marc21.bib_id, marc21.rero_id, 'sce --> aus')
                    role = 'aus'
                role = role.lower()
                if role not in _CONTRIBUTION_ROLE:
                    error_print('WARNING CONTRIBUTION ROLE DEFINITION:',
                                marc21.bib_id, marc21.rero_id, role)
                    role = 'ctb'
                roles.append(role)
        else:
            if key[:3] == '100':
                roles = ['cre']
            elif key[:3] == '711':
                roles = ['aut']
            else:
                roles = ['ctb']
        if agent:
            return {'agent': agent, 'role': list(set(roles))}
    return None
Esempio n. 7
0
def marc21_to_type_and_issuance(self, key, value):
    """
    Get document type and the mode of issuance.

    Books: LDR/6-7: am
    Journals: LDR/6-7: as
    Articles: LDR/6-7: aa
    Scores: LDR/6: c|d
    Videos: LDR/6: g + 007/0: m|v
    Sounds: LDR/6: i|j
    E-books (imported from Cantook)
    """
    # get the document type
    type = 'other'
    if marc21.record_type == 'a':
        if marc21.bib_level == 'm':
            type = 'book'
        elif marc21.bib_level == 's':
            type = 'journal'
        elif marc21.bib_level == 'a':
            type = 'article'
    elif marc21.record_type in ['c', 'd']:
        type = 'score'
    elif marc21.record_type in ['i', 'j']:
        type = 'sound'
    elif marc21.record_type == 'g':
        type = 'video'
        # Todo 007
    self['type'] = type

    # get the mode of issuance
    self['issuance'] = {}
    main_type = _ISSUANCE_MAIN_TYPE_PER_BIB_LEVEL.get(marc21.bib_level,
                                                      'rdami:1001')
    sub_type = 'NOT_DEFINED'
    error = False
    if marc21.bib_level == 'm':
        if marc21.is_top_level_record:
            main_type = 'rdami:1002'
            sub_type = 'set'
        else:
            sub_type = 'materialUnit'
    else:
        if marc21.bib_level in _ISSUANCE_SUBTYPE_PER_BIB_LEVEL:
            sub_type = _ISSUANCE_SUBTYPE_PER_BIB_LEVEL[marc21.bib_level]
        elif marc21.serial_type in _ISSUANCE_SUBTYPE_PER_SERIAL_TYPE:
            sub_type = _ISSUANCE_SUBTYPE_PER_SERIAL_TYPE[marc21.serial_type]
    if main_type == 'rdami:1001':
        if sub_type not in [
                'article', 'materialUnit', 'privateFile', 'privateSubfile'
        ]:
            error = True
            sub_type = 'materialUnit'
    elif main_type == 'rdami:1002':
        if sub_type not in [
                'set', 'partIndependentTitle', 'partDependantTitle'
        ]:
            error = True
            sub_type = 'set'
    elif main_type == 'rdami:1003':
        if sub_type not in [
                'serialInSerial', 'monographicSeries', 'periodical'
        ]:
            error = True
            sub_type = 'periodical'
    elif main_type == 'rdami:1004':
        if sub_type not in ['updatingWebsite', 'updatingLoose-leaf']:
            error = True
            sub_type = 'updatingWebsite'
    if error:
        error_print('WARNING ISSUANCE:', marc21.bib_id, marc21.rero_id,
                    main_type, sub_type, marc21.bib_level, marc21.serial_type)
    self['issuance'] = {'main_type': main_type, 'subtype': sub_type}