Esempio n. 1
0
        def _get_ids(value):
            def _is_jacow(j_value):
                return j_value.upper().startswith("JACOW-")

            def _is_orcid(j_value):
                return j_value.upper().startswith("ORCID:") and len(j_value) > 6

            def _is_naked_orcid(j_value):
                return ORCID.match(j_value)

            def _is_cern(j_value):
                return j_value.startswith("CCID-")

            result = []

            i_values = force_force_list(value.get("i"))
            for i_value in i_values:
                result.append({"type": "INSPIRE ID", "value": i_value})

            j_values = force_force_list(value.get("j"))
            for j_value in j_values:
                if _is_jacow(j_value):
                    result.append({"type": "JACOW", "value": "JACoW-" + j_value[6:]})
                elif _is_orcid(j_value):
                    result.append({"type": "ORCID", "value": j_value[6:]})
                elif _is_naked_orcid(j_value):
                    result.append({"type": "ORCID", "value": j_value})
                elif _is_cern(j_value):
                    result.append({"type": "CERN", "value": "CERN-" + j_value[5:]})

            w_values = force_force_list(value.get("w"))
            for w_value in w_values:
                result.append({"type": "INSPIRE BAI", "value": w_value})

            return result
Esempio n. 2
0
def persistent_identifiers(self, key, value):
    """Persistent Standard Identifiers."""
    value = force_force_list(value)

    dois = self.get('dois', [])
    persistent_identifiers = self.get('persistent_identifiers', [])
    for val in value:
        if val:
            items = force_force_list(val.get('a'))
            items_type = force_single_element(val.get('2'))
            if items_type and items_type.lower() == 'doi':
                for v in items:
                    dois.append({
                        'value': v,
                        'source': val.get('9')
                    })
            else:
                for v in items:
                    persistent_identifiers.append({
                        'value': v,
                        'source': val.get('9'),
                        'type': val.get('2')
                    })
    self['dois'] = dois
    return persistent_identifiers
Esempio n. 3
0
def acronym(self, key, value):
    """Conference acronym."""
    self['date'] = value.get('d')
    self['opening_date'] = value.get('x')
    self['closing_date'] = value.get('y')

    self['cnum'] = value.get('g')

    if value.get('a'):
        self.setdefault('titles', [])
        raw_titles = force_force_list(value.get('a'))
        for raw_title in raw_titles:
            title = {
                'title': raw_title,
                'subtitle': value.get('b'),
                'source': value.get('9'),
            }
            self['titles'].append(title)

    if value.get('c'):
        self.setdefault('address', [])
        raw_addresses = force_force_list(value.get('c'))
        for raw_address in raw_addresses:
            address = parse_conference_address(raw_address)
            self['address'].append(address)

    return value.get('e')
Esempio n. 4
0
    def _get_acc_exp_json(acc_exp_data):
        recids = []
        if '0' in acc_exp_data:
            try:
                recids = [
                    int(recid)
                    for recid in force_force_list(acc_exp_data.get('0'))
                ]
            except (TypeError, ValueError, AttributeError):
                pass

        experiment_names = force_force_list(acc_exp_data.get('e'))

        # XXX: we zip only when they have the same length, otherwise
        #      we might match a value with the wrong recid.
        if len(recids) == len(experiment_names):
            for recid, experiment_name in zip(recids, experiment_names):
                yield {
                    'record': get_record_ref(recid, 'experiments'),
                    'accelerator': acc_exp_data.get('a'),
                    'experiment': experiment_name,
                    'curated_relation': True
                }
        else:
            for experiment_name in experiment_names:
                yield {
                    'accelerator': acc_exp_data.get('a'),
                    'experiment': experiment_name,
                    'curated_relation': False,
                }
Esempio n. 5
0
def persistent_identifiers(self, key, value):
    """Persistent Standard Identifiers."""
    value = force_force_list(value)

    dois = self.get('dois', [])
    persistent_identifiers = self.get('persistent_identifiers', [])
    for val in value:
        if val:
            items = force_force_list(val.get('a'))
            items_type = force_single_element(val.get('2'))
            if items_type and items_type.lower() == 'doi':
                for v in items:
                    dois.append({
                        'value': v,
                        'source': val.get('9')
                    })
            else:
                for v in items:
                    persistent_identifiers.append({
                        'value': v,
                        'source': val.get('9'),
                        'type': val.get('2')
                    })
    self['dois'] = dois
    return persistent_identifiers
Esempio n. 6
0
def persistent_identifiers(self, key, value):
    """Persistent Standard Identifiers."""
    def _first_non_curator_source(sources):
        sources = force_force_list(sources)
        without_curator = filter(lambda el: el.upper() != 'CURATOR', sources)

        return force_single_element(without_curator)

    def _is_doi(type_):
        return type_ and type_.upper() == 'DOI'

    dois = self.get('dois', [])
    persistent_identifiers = self.get('persistent_identifiers', [])

    values = force_force_list(value)
    for value in values:
        if value:
            ids = force_force_list(value.get('a'))
            type_ = force_single_element(value.get('2'))
            source = _first_non_curator_source(value.get('9'))

            if _is_doi(type_):
                dois.extend([{
                    'source': source,
                    'value': id_,
                } for id_ in ids])
            else:
                persistent_identifiers.extend([{
                    'source': source,
                    'type': type_,
                    'value': id_,
                } for id_ in ids])

    self['dois'] = dois
    return persistent_identifiers
Esempio n. 7
0
    def _get_affiliations(value):
        result = []

        institutions = force_force_list(value.get('u'))
        recids = force_force_list(value.get('z'))

        # XXX: we zip only when they have the same length, otherwise
        #      we might match a value with the wrong recid.
        if len(institutions) == len(recids):
            for value, recid in zip(institutions, recids):
                try:
                    record = get_record_ref(int(recid), 'institutions')
                except (TypeError, ValueError):
                    record = None

                result.append({
                    'curated_relation': record is not None,
                    'record': record,
                    'value': value,
                })
        else:
            for value in institutions:
                result.append({
                    'curated_relation': False,
                    'value': value,
                })

        return result
Esempio n. 8
0
def institutions(self, key, value):
    """Institutions info."""
    institutions = self.get('institutions', [])

    a_values = force_force_list(value.get('a'))
    z_values = force_force_list(value.get('z'))

    # XXX: we zip only when they have the same length, otherwise
    #      we might match a value with the wrong recid.
    if len(a_values) == len(z_values):
        for a_value, z_value in zip(a_values, z_values):
            record = get_record_ref(z_value, 'institutions')
            institutions.append({
                'curated_relation': record is not None,
                'name': a_value,
                'record': record,
            })
    else:
        for a_value in a_values:
            institutions.append({
                'curated_relation': False,
                'name': a_value,
            })

    return institutions
Esempio n. 9
0
def acronym(self, key, value):
    """Conference acronym."""
    self['date'] = value.get('d')
    self['opening_date'] = value.get('x')
    self['closing_date'] = value.get('y')

    self['cnum'] = value.get('g')

    if value.get('a'):
        self.setdefault('titles', [])
        raw_titles = force_force_list(value.get('a'))
        for raw_title in raw_titles:
            title = {
                'title': raw_title,
                'subtitle': value.get('b'),
                'source': value.get('9'),
            }
            self['titles'].append(title)

    if value.get('c'):
        self.setdefault('address', [])
        raw_addresses = force_force_list(value.get('c'))
        for raw_address in raw_addresses:
            address = parse_conference_address(raw_address)
            self['address'].append(address)

    return value.get('e')
Esempio n. 10
0
    def _get_acc_exp_json(acc_exp_data):
        recids = []
        if '0' in acc_exp_data:
            try:
                recids = [
                    int(recid) for recid
                    in force_force_list(acc_exp_data.get('0'))
                ]
            except (TypeError, ValueError, AttributeError):
                pass

        experiment_names = force_force_list(acc_exp_data.get('e'))

        # XXX: we zip only when they have the same length, otherwise
        #      we might match a value with the wrong recid.
        if len(recids) == len(experiment_names):
            for recid, experiment_name in zip(recids, experiment_names):
                yield {
                    'record': get_record_ref(recid, 'experiments'),
                    'accelerator': acc_exp_data.get('a'),
                    'experiment': experiment_name,
                    'curated_relation': True
                }
        else:
            for experiment_name in experiment_names:
                yield {
                    'accelerator': acc_exp_data.get('a'),
                    'experiment': experiment_name,
                    'curated_relation': False,
                }
Esempio n. 11
0
def institutions(self, key, value):
    """Institutions info."""
    institutions = self.get('institutions', [])

    a_values = force_force_list(value.get('a'))
    z_values = force_force_list(value.get('z'))

    # XXX: we zip only when they have the same length, otherwise
    #      we might match a value with the wrong recid.
    if len(a_values) == len(z_values):
        for a_value, z_value in zip(a_values, z_values):
            record = get_record_ref(z_value, 'institutions')
            institutions.append({
                'curated_relation': record is not None,
                'name': a_value,
                'record': record,
            })
    else:
        for a_value in a_values:
            institutions.append({
                'curated_relation': False,
                'name': a_value,
            })

    return institutions
Esempio n. 12
0
def parse_institution_address(address, city, state_province,
                              country, postal_code, country_code):
    """Parse an institution address."""
    address_string = force_force_list(address)
    state_province = match_us_state(state_province) or state_province

    postal_code = force_force_list(postal_code)
    country = force_force_list(country)
    country_code = match_country_code(country_code)

    if isinstance(postal_code, (tuple, list)):
        postal_code = ', '.join(postal_code)

    if isinstance(city, (tuple, list)):
        city = ', '.join(city)

    if isinstance(country, (tuple, list)):
        country = ', '.join(set(country))

    if not country_code and country:
        country_code = match_country_name_to_its_code(country)

    if not country_code and state_province and state_province.startswith('US-'):
        country_code = 'US'

    return {
        'original_address': force_force_list(address),
        'city': city,
        'state': state_province,
        'country': country,
        'postal_code': postal_code,
        'country_code': country_code,
    }
Esempio n. 13
0
    def _get_affiliations(value):
        result = []

        institutions = force_force_list(value.get('u'))
        recids = force_force_list(value.get('z'))

        # XXX: we zip only when they have the same length, otherwise
        #      we might match a value with the wrong recid.
        if len(institutions) == len(recids):
            for value, recid in zip(institutions, recids):
                try:
                    record = get_record_ref(int(recid), 'institutions')
                except (TypeError, ValueError):
                    record = None

                result.append({
                    'curated_relation': record is not None,
                    'record': record,
                    'value': value,
                })
        else:
            for value in institutions:
                result.append({
                    'curated_relation': False,
                    'value': value,
                })

        return result
Esempio n. 14
0
def parse_institution_address(address, city, state_province,
                              country, postal_code, country_code):
    """Parse an institution address."""
    address_string = force_force_list(address)
    state_province = match_us_state(state_province) or state_province

    postal_code = force_force_list(postal_code)
    country = force_force_list(country)
    country_code = match_country_code(country_code)

    if isinstance(postal_code, (tuple, list)):
        postal_code = ', '.join(postal_code)

    if isinstance(city, (tuple, list)):
        city = ', '.join(city)

    if isinstance(country, (tuple, list)):
        country = ', '.join(set(country))

    if not country_code and country:
        country_code = match_country_name_to_its_code(country)

    if not country_code and state_province and state_province.startswith('US-'):
        country_code = 'US'

    return {
        'original_address': force_force_list(address),
        'city': city,
        'state': state_province,
        'country': country,
        'postal_code': postal_code,
        'country_code': country_code,
    }
Esempio n. 15
0
def _collection_in_record(record, collection):
    """Returns True if record is in collection"""
    colls = force_force_list(record.get("980__", []))
    for coll in colls:
        coll = force_force_list(coll.get('a', []))
        if collection in [c.lower() for c in coll]:
            return True
    return False
Esempio n. 16
0
def _collection_in_record(record, collection):
    """Returns True if record is in collection"""
    colls = force_force_list(record.get("980__", []))
    for coll in colls:
        coll = force_force_list(coll.get('a', []))
        if collection in [c.lower() for c in coll]:
            return True
    return False
Esempio n. 17
0
def name_variants(self, key, value):
    """Variants of the name."""
    if value.get("g"):
        self.setdefault("extra_words", [])
        self["extra_words"].extend(force_force_list(value.get("g")))

    values = self.get("name_variants", [])
    values.append({"source": value.get("9"), "value": force_force_list(value.get("a", []))})

    return values
Esempio n. 18
0
def ranks(self, key, value):
    """Ranks."""
    self.setdefault('_ranks', [])
    self.setdefault('ranks', [])

    values = force_force_list(value)
    for el in values:
        _ranks = force_force_list(el.get('a'))
        for _rank in _ranks:
            self['_ranks'].append(_rank)
            self['ranks'].append(classify_rank(_rank))
Esempio n. 19
0
        def _get_ids(value):
            def _is_jacow(j_value):
                return j_value.upper().startswith('JACOW-')

            def _is_orcid(j_value):
                return j_value.upper().startswith(
                    'ORCID:') and len(j_value) > 6

            def _is_naked_orcid(j_value):
                return ORCID.match(j_value)

            def _is_cern(j_value):
                return j_value.startswith('CCID-')

            result = []

            i_values = force_force_list(value.get('i'))
            for i_value in i_values:
                result.append({
                    'type': 'INSPIRE ID',
                    'value': i_value,
                })

            j_values = force_force_list(value.get('j'))
            for j_value in j_values:
                if _is_jacow(j_value):
                    result.append({
                        'type': 'JACOW',
                        'value': 'JACoW-' + j_value[6:],
                    })
                elif _is_orcid(j_value):
                    result.append({
                        'type': 'ORCID',
                        'value': j_value[6:],
                    })
                elif _is_naked_orcid(j_value):
                    result.append({
                        'type': 'ORCID',
                        'value': j_value,
                    })
                elif _is_cern(j_value):
                    result.append({
                        'type': 'CERN',
                        'value': 'CERN-' + j_value[5:],
                    })

            w_values = force_force_list(value.get('w'))
            for w_value in w_values:
                result.append({
                    'type': 'INSPIRE BAI',
                    'value': w_value,
                })

            return result
Esempio n. 20
0
def collaboration(self, key, value):
    """Collaboration of experiment."""
    values = force_force_list(self.get('collaboration'))
    values.extend(self.get('collaboration_alternative_names', []))
    values.extend(el.get('g') for el in force_force_list(value))

    collaborations = sorted(values, key=len)
    if len(collaborations) > 1:
        self['collaboration_alternative_names'] = collaborations[1:]
    if collaborations:
        return collaborations[0]
Esempio n. 21
0
def ranks(self, key, value):
    """Ranks."""
    self.setdefault('_ranks', [])
    self.setdefault('ranks', [])

    values = force_force_list(value)
    for el in values:
        _ranks = force_force_list(el.get('a'))
        for _rank in _ranks:
            self['_ranks'].append(_rank)
            self['ranks'].append(classify_rank(_rank))
Esempio n. 22
0
        def _get_ids(value):
            def _is_jacow(j_value):
                return j_value.upper().startswith('JACOW-')

            def _is_orcid(j_value):
                return j_value.upper().startswith('ORCID:') and len(j_value) > 6

            def _is_naked_orcid(j_value):
                return ORCID.match(j_value)

            def _is_cern(j_value):
                return j_value.startswith('CCID-')

            result = []

            i_values = force_force_list(value.get('i'))
            for i_value in i_values:
                result.append({
                    'type': 'INSPIRE ID',
                    'value': i_value,
                })

            j_values = force_force_list(value.get('j'))
            for j_value in j_values:
                if _is_jacow(j_value):
                    result.append({
                        'type': 'JACOW',
                        'value': 'JACoW-' + j_value[6:],
                    })
                elif _is_orcid(j_value):
                    result.append({
                        'type': 'ORCID',
                        'value': j_value[6:],
                    })
                elif _is_naked_orcid(j_value):
                    result.append({
                        'type': 'ORCID',
                        'value': j_value,
                    })
                elif _is_cern(j_value):
                    result.append({
                        'type': 'CERN',
                        'value': 'CERN-' + j_value[5:],
                    })

            w_values = force_force_list(value.get('w'))
            for w_value in w_values:
                result.append({
                    'type': 'INSPIRE BAI',
                    'value': w_value,
                })

            return result
Esempio n. 23
0
def name_variants(self, key, value):
    """Variants of the name."""
    if value.get('g'):
        self.setdefault('extra_words', [])
        self['extra_words'].extend(force_force_list(value.get('g')))

    values = self.get('name_variants', [])
    values.append({
        'source': value.get('9'),
        'value': force_force_list(value.get('a', [])),
    })

    return values
Esempio n. 24
0
def name_variants(self, key, value):
    """Variants of the name."""
    if value.get('g'):
        self.setdefault('extra_words', [])
        self['extra_words'].extend(force_force_list(value.get('g')))

    values = self.get('name_variants', [])
    values.append({
        'source': value.get('9'),
        'value': force_force_list(value.get('a', [])),
    })

    return values
Esempio n. 25
0
def add_book_info(record, blob):
    """Add link to the appropriate book record."""
    collections = []
    if 'collections' in record:
        for c in record.get('collections', ''):
            if c.get('primary', ''):
                collections.append(c.get('primary').lower())
        if 'bookchapter' in collections:
            pubinfos = force_force_list(blob.get("773__", []))
            for pubinfo in pubinfos:
                if pubinfo.get('0'):
                    record['book'] = {
                        'record': get_record_ref(
                            int(force_force_list(pubinfo.get('0'))[0]), 'literature')
                    }
Esempio n. 26
0
def report_numbers(self, key, value):
    """Report numbers and arXiv numbers from 037."""
    def get_value(value):
        return {
            'source': value.get('9'),
            'value': value.get('a', value.get('z')),
        }

    def get_value_arxiv(value):
        return {
            'value': value.get('a'),
            'categories': force_force_list(value.get('c')),
        }

    report_number = self.get('report_numbers', [])
    arxiv_eprints = self.get('arxiv_eprints', [])

    value = force_force_list(value)
    for element in value:
        if element.get('9') and element.get('9') == 'arXiv' and 'c' in element:
            arxiv_eprints.append(get_value_arxiv(element))
        else:
            report_number.append(get_value(element))

    self['arxiv_eprints'] = arxiv_eprints
    return report_number
Esempio n. 27
0
        def _get_affiliations(value):
            result = []

            u_values = force_force_list(value.get("u"))
            z_values = force_force_list(value.get("z"))

            # XXX: we zip only when they have the same length, otherwise
            #      we might match a value with the wrong recid.
            if len(u_values) == len(z_values):
                for u_value, z_value in zip(u_values, z_values):
                    result.append({"record": get_record_ref(z_value, "institutions"), "value": u_value})
            else:
                for u_value in u_values:
                    result.append({"value": u_value})

            return result
Esempio n. 28
0
        def _get_full_name(value):
            a_values = force_force_list(value.get("a"))
            if a_values:
                if len(a_values) > 1:
                    logger.warning("Record with mashed up authors list. " "Taking first author: %s", a_values[0])

                return a_values[0]
Esempio n. 29
0
def field_categories(self, key, value):
    """Field categories."""
    self.setdefault('field_categories', [])

    _terms = force_force_list(value.get('a'))

    if _terms:
        for _term in _terms:
            term = classify_field(_term)

            scheme = 'INSPIRE' if term else None

            _scheme = value.get('2')
            if isinstance(_scheme, (list, tuple)):
                _scheme = _scheme[0]

            source = value.get('9')
            if source:
                if 'automatically' in source:
                    source = 'INSPIRE'

            self['field_categories'].append({
                'source': source,
                '_scheme': _scheme,
                'scheme': scheme,
                '_term': _term,
                'term': term,
            })
Esempio n. 30
0
def report_numbers(self, key, value):
    """Report numbers and arXiv numbers from 037."""
    def get_value(value):
        return {
            'source': value.get('9'),
            'value': value.get('a', value.get('z')),
        }

    def get_value_arxiv(value):
        return {
            'value': value.get('a'),
            'categories': force_force_list(value.get('c')),
        }

    report_number = self.get('report_numbers', [])
    arxiv_eprints = self.get('arxiv_eprints', [])

    value = force_force_list(value)
    for element in value:
        if element.get('9') and element.get('9') == 'arXiv' and 'c' in element:
            arxiv_eprints.append(get_value_arxiv(element))
        else:
            report_number.append(get_value(element))

    self['arxiv_eprints'] = arxiv_eprints
    return report_number
Esempio n. 31
0
def regions(self, key, value):
    """Regions."""
    REGIONS_MAP = {
        'AF': 'Africa',
        'Africa': 'Africa',
        'Asia': 'Asia',
        'Australia': 'Australasia',
        'Australasia': 'Australasia',
        'eu': 'Europe',
        'Europe': 'Europe',
        'Middle East': 'Middle East',
        'na': 'North America',
        'United States': 'North America',
        'Noth America': 'North America',
        'North America': 'North America',
        'North Americsa': 'North America',
        'South America': 'South America',
    }

    result = []

    for el in force_force_list(value.get('a')):
        for region in COMMA_OR_SLASH.split(el):
            result.append(REGIONS_MAP.get(region))

    return result
Esempio n. 32
0
def get_subject(record):
    inspire_categories = force_force_list(
        get_value(record, 'inspire_categories'))
    terms = [ic['term'] for ic in inspire_categories if ic.get('term')]

    if terms:
        return terms[0]
Esempio n. 33
0
def name(self, key, value):
    """Name information.

    Please note that MARC field for an author's name is splitted into two
    fields, `last_name` and `first_name`. The same situation occurs for
    the date fields, in JSON it is splitted into `birth_year` and `death_year`.

    Admissible string values for `100__g`:
    + active
    + departed
    + retired
    + deceased

    The only accepted value in `100__c` field is:
    + Sir

    Values accepted for `100__b:
    + Jr.
    + Sr.
    + roman numbers (like VII)
    """
    value = force_force_list(value)
    self.setdefault("breadcrumb_title", value[0].get("a"))
    self.setdefault("dates", value[0].get("d"))
    return {
        "value": value[0].get("a"),
        "numeration": value[0].get("b"),
        "title": value[0].get("c"),
        "status": value[0].get("g"),
        "preferred_name": value[0].get("q"),
    }
Esempio n. 34
0
def split_page_artid(page_artid):
    """Split page_artid into page_start/end and artid."""
    page_start = None
    page_end = None
    artid = None

    if not page_artid:
        return None, None, None

    # TODO use force_force_list when it's in inspirehep.utils.
    page_artid_l = force_force_list(page_artid)

    for page_artid in page_artid_l:
        if page_artid:
            if '-' in page_artid:
                # if it has a dash it's a page range
                page_range = page_artid.split('-')
                if len(page_range) == 2:
                    page_start, page_end = page_range
                else:
                    artid = page_artid
            elif _RE_2_CHARS.search(page_artid):
                # if it has 2 ore more letters it's an article ID
                artid = page_artid
            elif len(page_artid) >= 5:
                # it it is longer than 5 digits it's an article ID
                artid = page_artid
            else:
                if artid is None:
                    artid = page_artid
                if page_start is None:
                    page_start = page_artid

    return page_start, page_end, artid
Esempio n. 35
0
 def get_int_value(val):
     if val:
         out = force_force_list(val)[0]
         if out.isdigit():
             out = int(out)
             return out
     return None
Esempio n. 36
0
def split_page_artid(page_artid):
    """Split page_artid into page_start/end and artid."""
    page_start = None
    page_end = None
    artid = None

    if not page_artid:
        return None, None, None

    # TODO use force_force_list when it's in inspirehep.utils.
    page_artid_l = force_force_list(page_artid)

    for page_artid in page_artid_l:
        if page_artid:
            if '-' in page_artid:
                # if it has a dash it's a page range
                page_range = page_artid.split('-')
                if len(page_range) == 2:
                    page_start, page_end = page_range
                else:
                    artid = page_artid
            elif _RE_2_CHARS.search(page_artid):
                # if it has 2 ore more letters it's an article ID
                artid = page_artid
            elif len(page_artid) >= 5:
                # it it is longer than 5 digits it's an article ID
                artid = page_artid
            else:
                if artid is None:
                    artid = page_artid
                if page_start is None:
                    page_start = page_artid

    return page_start, page_end, artid
Esempio n. 37
0
def phd_advisors(self, key, value):
    degree_type_map = {"phd": "PhD", "master": "Master"}
    degree_type = None
    if value.get("g"):
        degree_type_raw = force_force_list(value.get("g"))[0]
        degree_type = degree_type_map.get(degree_type_raw.lower(), degree_type_raw)
    return {"id": value.get("i"), "name": value.get("a"), "degree_type": degree_type}
Esempio n. 38
0
    def _get_source(value):
        sources = force_force_list(value.get('a'))
        sources_without_inspire_uid = [
            el for el in sources if not el.startswith('inspire:uid:')
        ]

        return force_single_element(sources_without_inspire_uid)
Esempio n. 39
0
def titles(self, key, value):
    def is_main_title(key):
        return key.startswith('245')

    def is_translated_title(key):
        return key.startswith('242')

    titles = self.setdefault('titles', [])
    values = force_force_list(value)
    for val in values:
        title_obj = {
            'title': val.get('a'),
            'subtitle': force_single_element(val.get('b')),  # FIXME: #1484
            'source': val.get('9'),
        }
        if is_main_title(key):
            titles.insert(0, title_obj)
        elif is_translated_title(key):
            title = val.get('a')
            if title:
                lang = langdetect.detect(title)
                if lang:
                    title_obj['language'] = lang
                    self.setdefault('title_translations', []).append(title_obj)
        else:
            titles.append(title_obj)

    return titles
Esempio n. 40
0
 def get_int_value(val):
     if val:
         out = force_force_list(val)[0]
         if out.isdigit():
             out = int(out)
             return out
     return None
Esempio n. 41
0
def authors2marc(self, key, value):
    """Main Entry-Personal Name."""
    value = force_force_list(value)

    def get_value(value):
        affiliations = [
            aff.get('value') for aff in value.get('affiliations', [])
        ]
        return {
            'a': value.get('full_name'),
            'e': value.get('role'),
            'q': value.get('alternative_names'),
            'i': value.get('inspire_id'),
            'j': value.get('orcid'),
            'm': value.get('emails'),
            'u': affiliations,
            'x': get_recid_from_ref(value.get('record')),
            'y': value.get('curated_relation')
        }

    if len(value) > 1:
        self["700"] = []
    for author in value[1:]:
        self["700"].append(get_value(author))
    return get_value(value[0])
Esempio n. 42
0
def inspire_categories(self, key, value):
    """Inspire categories."""
    schema = load_schema('elements/inspire_field')
    possible_sources = schema['properties']['source']['enum']

    _terms = force_force_list(value.get('a'))
    source = value.get('9')

    if source not in possible_sources:
        if source == 'automatically added based on DCC, PPF, DK':
            source = 'curator'
        elif source == 'submitter':
            source = 'user'
        else:
            source = 'undefined'

    self.setdefault('inspire_categories', [])
    if _terms:
        for _term in _terms:
            term = classify_field(_term)
            if term:
                inspire_category = {
                    'term': term,
                    'source': source,
                }
                self['inspire_categories'].append(inspire_category)
Esempio n. 43
0
def references(self, key, value):
    """Produce list of references."""
    value = force_force_list(value)

    def get_value(value):
        # Retrieve fields as described here:
        # https://twiki.cern.ch/twiki/bin/view/Inspire/DevelopmentRecordMarkup.
        rb = ReferenceBuilder()
        mapping = [('o', rb.set_number), ('m', rb.add_misc),
                   ('x', partial(rb.add_raw_reference, source='dojson')),
                   ('1', rb.set_texkey), ('u', rb.add_url),
                   ('r', rb.add_report_number), ('s', rb.set_pubnote),
                   ('p', rb.set_publisher), ('y', rb.set_year),
                   ('i', rb.add_uid), ('b', rb.add_uid), ('a', rb.add_uid),
                   ('c', rb.add_collaboration), ('q', rb.add_title),
                   ('t', rb.add_title), ('h', rb.add_refextract_authors_str),
                   ('e', partial(rb.add_author, role='ed.'))]

        for field, method in mapping:
            for element in force_force_list(value.get(field)):
                if element:
                    method(element)

        if '0' in value:
            recid = get_int_value(value, '0')
            rb.set_record(get_record_ref(recid, 'literature'))

        return rb.obj

    references = self.get('references', [])
    references.extend(get_value(v) for v in value)
    return references
Esempio n. 44
0
def experiments2marc(self, key, values):
    """Information about experiments.

    FIXME: use the flatten decorator once DoJSON 1.3.0 is released.
    """
    def _get_marc_experiment(json_dict):
        marc = {
            'e': json_dict.get('name'),
            's': json_dict.get('start_year'),
            'd': json_dict.get('end_year'),
        }
        status = 'current' if json_dict.get('current') else None
        if status:
            marc['z'] = status
        recid = get_recid_from_ref(json_dict.get('record', None))
        if recid:
            marc['0'] = recid
        return marc

    marc_experiments = self.get('693', [])
    values = force_force_list(values)
    for experiment in values:
        if experiment:
            marc_experiments.append(_get_marc_experiment(experiment))

    return marc_experiments
Esempio n. 45
0
def authors2marc(self, key, value):
    """Main Entry-Personal Name."""
    value = force_force_list(value)

    def get_value(value):
        affiliations = [
            aff.get('value') for aff in value.get('affiliations', [])
        ]
        return {
            'a': value.get('full_name'),
            'e': value.get('role'),
            'q': value.get('alternative_name'),
            'i': value.get('inspire_id'),
            'j': value.get('orcid'),
            'm': value.get('email'),
            'u': affiliations,
            'x': get_recid_from_ref(value.get('record')),
            'y': value.get('curated_relation')
        }

    if len(value) > 1:
        self["700"] = []
    for author in value[1:]:
        self["700"].append(get_value(author))
    return get_value(value[0])
Esempio n. 46
0
def name(self, key, value):
    """Name information.

    Please note that MARC field for an author's name is splitted into two
    fields, `last_name` and `first_name`. The same situation occurs for
    the date fields, in JSON it is splitted into `birth_year` and `death_year`.

    Admissible string values for `100__g`:
    + active
    + departed
    + retired
    + deceased

    The only accepted value in `100__c` field is:
    + Sir

    Values accepted for `100__b:
    + Jr.
    + Sr.
    + roman numbers (like VII)
    """
    value = force_force_list(value)
    self.setdefault('dates', value[0].get('d'))
    return {
        'value': value[0].get('a'),
        'numeration': value[0].get('b'),
        'title': value[0].get('c'),
        'status': value[0].get('g'),
        'preferred_name': value[0].get('q'),
    }
Esempio n. 47
0
def regions(self, key, value):
    """Regions."""
    REGIONS_MAP = {
        'AF': 'Africa',
        'Africa': 'Africa',
        'Asia': 'Asia',
        'Australia': 'Australasia',
        'Australasia': 'Australasia',
        'eu': 'Europe',
        'Europe': 'Europe',
        'Middle East': 'Middle East',
        'na': 'North America',
        'United States': 'North America',
        'Noth America': 'North America',
        'North America': 'North America',
        'North Americsa': 'North America',
        'South America': 'South America',
    }

    result = []

    for el in force_force_list(value.get('a')):
        for region in COMMA_OR_SLASH.split(el):
            result.append(REGIONS_MAP.get(region))

    return result
Esempio n. 48
0
def name(self, key, value):
    """Name information.

    Please note that MARC field for an author's name is splitted into two
    fields, `last_name` and `first_name`. The same situation occurs for
    the date fields, in JSON it is splitted into `birth_year` and `death_year`.

    Admissible string values for `100__g`:
    + active
    + departed
    + retired
    + deceased

    The only accepted value in `100__c` field is:
    + Sir

    Values accepted for `100__b:
    + Jr.
    + Sr.
    + roman numbers (like VII)
    """
    value = force_force_list(value)
    self.setdefault('dates', value[0].get('d'))
    return {
        'value': value[0].get('a'),
        'numeration': value[0].get('b'),
        'title': value[0].get('c'),
        'status': value[0].get('g'),
        'preferred_name': value[0].get('q'),
    }
Esempio n. 49
0
    def get_value(value):
        # Retrieve fields as described here:
        # https://twiki.cern.ch/twiki/bin/view/Inspire/DevelopmentRecordMarkup.
        rb = ReferenceBuilder()
        mapping = [
            ('o', rb.set_number),
            ('m', rb.add_misc),
            ('x', partial(rb.add_raw_reference, source='dojson')),
            ('1', rb.set_texkey),
            ('u', rb.add_url),
            ('r', rb.add_report_number),
            ('s', rb.set_pubnote),
            ('p', rb.set_publisher),
            ('y', rb.set_year),
            ('i', rb.add_uid),
            ('b', rb.add_uid),
            ('a', rb.add_uid),
            ('c', rb.add_collaboration),
            ('q', rb.add_title),
            ('t', rb.add_title),
            ('h', rb.add_refextract_authors_str),
            ('e', partial(rb.add_author, role='ed.'))
        ]

        for field, method in mapping:
            for element in force_force_list(value.get(field)):
                if element:
                    method(element)

        if '0' in value:
            recid = get_int_value(value, '0')
            rb.set_record(get_record_ref(recid, 'literature'))

        return rb.obj
Esempio n. 50
0
def field_categories(self, key, value):
    """Field categories."""
    self.setdefault('field_categories', [])

    _terms = force_force_list(value.get('a'))

    if _terms:
        for _term in _terms:
            term = classify_field(_term)

            scheme = 'INSPIRE' if term else None

            _scheme = value.get('2')
            if isinstance(_scheme, (list, tuple)):
                _scheme = _scheme[0]

            source = value.get('9')
            if source:
                if 'automatically' in source:
                    source = 'INSPIRE'

            self['field_categories'].append({
                'source': source,
                '_scheme': _scheme,
                'scheme': scheme,
                '_term': _term,
                'term': term,
            })
Esempio n. 51
0
def experiments2marc(self, key, values):
    """Information about experiments.

    FIXME: use the flatten decorator once DoJSON 1.3.0 is released.
    """
    def _get_marc_experiment(json_dict):
        marc = {
            'e': json_dict.get('name'),
            's': json_dict.get('start_year'),
            'd': json_dict.get('end_year'),
        }
        status = 'current' if json_dict.get('current') else None
        if status:
            marc['z'] = status
        recid = get_recid_from_ref(json_dict.get('record', None))
        if recid:
            marc['0'] = recid
        return marc

    marc_experiments = self.get('693', [])
    values = force_force_list(values)
    for experiment in values:
        if experiment:
            marc_experiments.append(_get_marc_experiment(experiment))

    return marc_experiments
Esempio n. 52
0
def titles(self, key, value):
    def is_main_title(key):
        return key.startswith('245')

    def is_translated_title(key):
        return key.startswith('242')

    titles = self.setdefault('titles', [])
    values = force_force_list(value)
    for val in values:
        title_obj = {
            'title': val.get('a'),
            'subtitle': force_single_element(val.get('b')),  # FIXME: #1484
            'source': val.get('9'),
        }
        if is_main_title(key):
            titles.insert(0, title_obj)
        elif is_translated_title(key):
            title = val.get('a')
            if title:
                lang = langdetect.detect(title)
                if lang:
                    title_obj['language'] = lang
                    self.setdefault('title_translations', []).append(title_obj)
        else:
            titles.append(title_obj)

    return titles
Esempio n. 53
0
def spires_sysnos2marc(self, key, value):
    """970 SPIRES number and new recid."""
    value = force_force_list(value)
    existing_values = self.get('970', [])

    val_recids = [get_recid_from_ref(val) for val in value]
    existing_values.extend([{'d': val} for val in val_recids if val])
    return existing_values
Esempio n. 54
0
def name_variants(self, key, value):
    """Variants of the name."""
    valid_sources = ["DESY_AFF", "ADS", "INSPIRE"]
    if value.get('9') and value.get('9') not in valid_sources:
        return self.get('name_variants', [])

    if value.get('g'):
        self.setdefault('extra_words', [])
        self['extra_words'].extend(force_force_list(value.get('g')))

    values = self.get('name_variants', [])
    values.append({
        'source': value.get('9'),
        'value': force_force_list(value.get('a', [])),
    })

    return values
Esempio n. 55
0
    def _was_not_published(json):
        def _not_published(publication_info):
            return 'page_start' not in publication_info and 'artid' not in publication_info

        publication_infos = force_force_list(get_value(json, 'publication_info'))
        not_published = map(_not_published, publication_infos)

        return all(not_published)
Esempio n. 56
0
def collaboration(self, key, value):
    """Collaboration of experiment."""
    value = force_force_list(value)
    collaborations = sorted((elem["g"] for elem in value if 'g' in elem), key=lambda x: len(x))
    if len(collaborations) > 1:
        self['collaboration_alternative_names'] = collaborations[1:]
    if collaborations:
        return collaborations[0]
Esempio n. 57
0
def other_names(self, key, value):
    """Other variation of names.

    Usually a different form of writing the primary name.
    """
    other_names = self.get('other_names', [])
    other_names.extend(force_force_list(value.get('a')))

    return other_names
Esempio n. 58
0
        def _get_full_name(value):
            a_values = force_force_list(value.get('a'))
            if a_values:
                if len(a_values) > 1:
                    logger.warning(
                        'Record with mashed up authors list. '
                        'Taking first author: %s', a_values[0])

                return a_values[0]