def build_ils_contributor(value): """Create the contributors for books.""" if not value.get("a"): return [] role = _get_correct_ils_contributor_role("e", value.get("e", "author")) contributor = { "identifiers": _extract_json_ils_ids(value, "scheme") or None, "full_name": value.get("name") or clean_val("a", value, str), "alternative_names": [], "type": "PERSON", } if role: contributor.update({"roles": [role]}) subfield_q = clean_val('q', value, str) if subfield_q: contributor.update({"alternative_names": [subfield_q]}) value_u = value.get("u") if value_u: values_u_list = list(force_list(value_u)) other = ["et al.", "et al"] for x in other: if x in values_u_list: values_u_list.remove(x) contributor["affiliations"] = [{"name": x} for x in values_u_list] contributor = dict( (k, v) for k, v in iteritems(contributor) if v is not None ) return contributor
def publication_additional(self, key, value): """Translates additional publication info & other related_records field.""" _publication_info = self.get("publication_info", []) _urls = self.get("urls", []) empty = not bool(_publication_info) host = current_app.config["SPA_HOST"] for i, v in enumerate(force_list(value)): temp_info = {} pages = clean_val("k", v, str) if pages: temp_info.update(pages=pages) related_recid = clean_val("b", v, str) if related_recid: _urls.append( { "value": f"{host}/legacy/{related_recid}", "description": "is chapter of" } ) if not empty and i < len(_publication_info): _publication_info[i].update(temp_info) else: _publication_info.append(temp_info) self["urls"] = _urls return _publication_info
def serial(self, key, value): """Translate serial.""" subfield_x = clean_val("x", value, str) identifiers = None if subfield_x: issn_value = rreplace(subfield_x, ";", "", 1).strip() if issn_value: identifiers = [{"scheme": "ISSN", "value": issn_value}] volume = clean_val("v", value, str) if volume: volume = re.findall(r"\d+", volume) serial_title = \ clean_val("a", value, str, req=True).rstrip(',').rstrip(';')\ .strip() words_to_replace = ["ser.", "Ser."] for word in words_to_replace: # check if the word on the end of the title if re.search(f"{word}$", serial_title): serial_title = rreplace(serial_title, word, "series", 1) # remove excess white spaces serial_title = " ".join(serial_title.split()) return { "title": serial_title.strip(), "identifiers": identifiers, "volume": volume[0] if volume else None, }
def identifiers(self, key, value): """Translate identifiers.""" _isbns = self.get("identifiers", []) for v in force_list(value): material = clean_val("u", v, str) sub_a = clean_val("a", v, str) if sub_a: isbn = {"value": sub_a, "scheme": "ISBN", "material": "DIGITAL"} if isbn not in _isbns: _isbns.append(isbn) return _isbns
def imprint(self, key, value): """Translate imprint field.""" _publication_year = self.get("publication_year") if _publication_year: raise UnexpectedValue(subfield="e", message="doubled publication year") self["publication_year"] = clean_val("c", value, str).rstrip('.') return { "place": clean_val("a", value, str).rstrip(':'), "publisher": "Springer", }
def title(self, key, value): """Translates title.""" if "title" in self: raise UnexpectedValue(message="Ambiguous title") if "b" in value: _alternative_titles = self.get("alternative_titles", []) _alternative_titles.append( {"value": clean_val("b", value, str), "type": "SUBTITLE"} ) self["alternative_titles"] = _alternative_titles return clean_val("a", value, str, req=True)
def identifiers(self, key, value): """Translate identifiers.""" _identifiers = self.get("identifiers", []) for v in force_list(value): subfield_u = clean_val("u", v, str) or "DIGITAL" sub_a = clean_val("a", v, str) sub_2 = clean_val("2", v, str) if sub_2.lower() != "doi": raise ManualImportRequired("wrong DOI marc") doi = {"value": sub_a, "scheme": "DOI", "material": subfield_u} if doi not in _identifiers: _identifiers.append(doi) return _identifiers
def authors(self, key, value): """Translates authors.""" _authors = self.get("authors", []) author = { "full_name": clean_val("a", value, str, req=True).rstrip(','), "roles": [_get_correct_ils_contributor_role("e", clean_val("e", value, str))], "type": "PERSON", } _authors.append(author) return _authors
def subjects_dewey(self, key, value): """Translates subject classification.""" _subjects = self.get("subjects", []) subject = {"scheme": "DEWEY", "value": clean_val("a", value, str)} if subject not in _subjects: _subjects.append(subject) return _subjects
def eitem_int_note(self, key, value): """Translate eitem internal note.""" _eitem = self.get("_eitem", {}) int_note = clean_val("a", value, str) _eitem.update({"internal_notes": int_note}) return _eitem
def title(self, key, value): """Translates title.""" if "title" in self: raise UnexpectedValue(message="Ambiguous title") if "b" in value: _alternative_titles = self.get("alternative_titles", []) subtitle = clean_val("b", value, str).rstrip('/') _alternative_titles.append({"value": subtitle, "type": "SUBTITLE"}) self["alternative_titles"] = _alternative_titles title = clean_val("a", value, str, req=True).rstrip("/") # remove excess white spaces title = " ".join(title.split()) return title
def languages(self, key, value): """Translates languages fields.""" lang = clean_val("a", value, str).lower() try: return pycountry.languages.lookup(lang).alpha_3.upper() except (KeyError, AttributeError, LookupError): raise UnexpectedValue(subfield="a")
def edition(self, key, value): """Translate edition field.""" _edition = clean_val("a", value, str)\ .replace("ed.", "")\ .replace("edition", "").rstrip('.') _edition = re.sub(r"\d{4}", "", _edition) return _edition.strip()
def open_access(self, key, value): """Translate open access.""" _open_access = clean_val("a", value, str) _eitem = self.get("_eitem", {}) if _open_access.lower() == "open access": _eitem["open_access"] = True self["_eitem"] = _eitem raise IgnoreKey("open_access")
def test_clean_val(subfield, value, var_type, req, default, manual, output): """Test if clean value works properly""" assert ( clean_val( subfield, value, var_type, req=req, default=default, manual=manual ) == output )
def id_isbns(self, key, value): """Translate identifiers isbn.""" _identifiers = self.get("identifiers", []) isbn_value = clean_val("a", value, str) or (clean_val('z', value, str)) material = clean_val("u", value, str) if isbn_value: isbn = { "scheme": "ISBN", "value": clean_val("z", value, str), "material": "PRINT_VERSION", } if isbn not in _identifiers: _identifiers.append(isbn) return _identifiers
def document_type(self, key, value): """Translate document type.""" document_type = clean_val("a", value, str) if document_type in Document.DOCUMENT_TYPES: return document_type raise ManualImportRequired( subfield="a", message="Document type {} is not allowed.".format(document_type), )
def print_identifiers(self, key, value): """Translate identifiers.""" _identifiers = self.get("identifiers", []) isbn = { "scheme": "ISBN", "value": clean_val("z", value, str, req=True), "material": "PRINT_VERSION" } if isbn not in _identifiers: _identifiers.append(isbn) return _identifiers
def subject_classification(self, key, value): """Translates subject classification field.""" prev_subjects = self.get("subjects", []) _subject_classification = { "value": clean_val("c", value, str, req=True), "scheme": "ICS" } if _subject_classification not in prev_subjects: return _subject_classification else: raise IgnoreKey("subjects")
def authors(self, key, value): """Translates authors.""" _authors = self.get("authors", []) orcid = clean_val("0", value, str) identifiers = None if orcid: identifiers = [{"scheme": "ORCID", "value": orcid}] author = { "full_name": clean_val("a", value, str, req=True).rstrip('.'), "identifiers": identifiers, "roles": [_get_correct_ils_contributor_role("e", clean_val("e", value, str))], "type": "PERSON" } _authors.append(author) return _authors
def eitem(self, key, value): """Translate included eitems.""" _eitem = self.get("_eitem", {}) urls = [] for v in force_list(value): urls.append({ "description": "e-book", "value": clean_val("u", v, str), }) _eitem.update({"urls": urls}) return _eitem
def alternative_identifiers(self, key, value): """Translate alternative identifiers.""" _alternative_identifiers = self.get("alternative_identifiers", []) if "a" in value: val_a = clean_val("a", value, str, req=True) if "(Au-PeEL)" in val_a: val_a = val_a.replace("(Au-PeEL)", "").replace("EBL", "") identifier = {"scheme": "EBL", "value": val_a} if identifier not in _alternative_identifiers: _alternative_identifiers.append(identifier) return _alternative_identifiers
def languages(self, key, value): """Translates languages fields.""" lang = clean_val("b", value, str).lower() _languages = self.get("languages", []) try: new_lang = pycountry.languages.lookup(lang).alpha_3.upper() if new_lang not in _languages: return new_lang else: raise IgnoreKey("languages") except (KeyError, AttributeError, LookupError): raise UnexpectedValue(subfield="a")
def digital_identifiers(self, key, value): """Translate identifiers.""" _identifiers = self.get("identifiers", []) isbn = { "scheme": "ISBN", "value": clean_val("a", value, str, req=True), "material": "DIGITAL" } if isbn not in _identifiers: _identifiers.append(isbn) return _identifiers
def title_translations(self, key, value): """Translates title translations.""" _alternative_titles = self.get("alternative_titles", []) if "a" in value: _alternative_titles.append( { "value": clean_val("a", value, str, req=True), "type": "TRANSLATED_TITLE", "language": "FRA", } ) if "b" in value: _alternative_titles.append( { "value": clean_val("b", value, str, req=True), "type": "TRANSLATED_SUBTITLE", "language": "FRA", } ) return _alternative_titles
def keywords(self, key, value): """Translate keywords.""" _keywords = self.get("keywords", []) keyword = { "source": "SPR", "value": clean_val("a", value, str, req=True).rstrip('.') } if keyword not in _keywords: _keywords.append(keyword) return _keywords
def number_of_pages(self, key, value): """Translates number_of_pages fields.""" val = clean_val("a", value, str) if is_excluded(val): raise IgnoreKey("number_of_pages") parts = extract_parts(val) if parts["has_extra"]: raise UnexpectedValue(subfield="a") if parts["physical_description"]: self["physical_description"] = parts["physical_description"] if parts["number_of_pages"]: return parts["number_of_pages"] raise UnexpectedValue(subfield="a")
def test_clean_val_regexp( subfield, value, var_type, req, default, manual, regexp, output ): assert ( clean_val( subfield, value, var_type, req=req, default=default, manual=manual, regex_format=regexp, ) == output )
def abstract(self, key, value): """Translate abstract.""" return clean_val("a", value, str)
def number_of_pages(self, key, value): """Translate number of pages.""" numbers = re.findall(r"\d+", clean_val("a", value, str)) return numbers[0]