def barcode(self, key, value): """Translates the barcodes.""" _migration = self["_migration"] for v in force_list(value): val_a = clean_val("a", v, str) val_n = clean_val("n", v, str) val_x = clean_val("x", v, str) val_9 = clean_val("9", v, str) if val_a or val_9: if val_n or val_x or val_a and val_9: raise UnexpectedValue() identifier = {"scheme": "report_number", "value": val_a or val_9} if val_9: identifier["hidden"] = True identifiers = self.get("identifiers", []) identifiers.append(identifier) self["identifiers"] = identifiers raise IgnoreKey("barcode") if val_n and val_x: volume_number = extract_volume_number( val_n, raise_exception=True, subfield="n" ) _insert_volume(_migration, volume_number, {"barcode": val_x}) elif val_x: raise MissingRequiredField( subfield="n", message=" this record is missing a volume number" ) else: raise MissingRequiredField( subfield="x", message=" this record is missing a barcode number", ) raise IgnoreKey("barcode")
def standard_numbers(self, key, value): """Translates standard numbers values.""" _identifiers = self.get("identifiers", []) a = clean_val("a", value, str) b = clean_val("b", value, str) sn = a or b if sn: _identifiers.append({ "value": sn, "scheme": "STANDARD_NUMBER", }) return _identifiers raise MissingRequiredField(subfield="a or b")
def report_numbers(self, key, value): """Translates report_numbers fields.""" def get_value_rn(f_a, f_z, f_9, rn_obj): rn_obj.update({"value": f_a or f_z or f_9, "scheme": "REPORT_NUMBER"}) if f_z or f_9: rn_obj.update({"hidden": True}) _identifiers = self.get("identifiers", []) sub_9 = clean_val("9", value, str) sub_a = clean_val("a", value, str) sub_z = clean_val("z", value, str) all_empty = not (sub_z or sub_a or sub_9) if key == "037__": entry = {} if all_empty: raise MissingRequiredField(subfield="9 or a or z") if sub_9 == "arXiv": arxiv_eprints(self, key, value) raise IgnoreKey("identifiers") else: get_value_rn(sub_a, sub_z, sub_9, entry) _identifiers.append(entry) if key == "088__": entry = {} if "n" in value or "x" in value: barcodes(self, key, value) if all_empty and "n" not in value and "x" not in value: raise MissingRequiredField(subfield="9 or a or z or n or x") get_value_rn(sub_a, sub_z, sub_9, entry) _identifiers.append(entry) return _identifiers
def number_of_volumes(self, key, value): """Translates number of volumes.""" _series_title = self.get("title", None) if not _series_title: raise MissingRequiredField( subfield="a", message=" this record is missing a main title" ) val_a = clean_val("a", value, str) parsed_a = extract_parts(val_a) if not parsed_a["number_of_pages"] and ("v" in val_a or "vol" in val_a): _volumes = re.findall(r"\d+", val_a) if _volumes: return _volumes[0] raise IgnoreKey("number_of_volumes")
def migration(self, key, value): """Translates volumes titles.""" _series_title = self.get("title", None) volume_title = self.get("title", None) _migration = self["_migration"] for v in force_list(value): # check if it is a multipart monograph val_n = clean_val("n", v, str) val_p = clean_val("p", v, str) val_y = clean_val("y", v, str) if not val_n and not val_p: raise UnexpectedValue( subfield="n", message=" this record is probably not a series" ) if val_p and not val_n: raise UnexpectedValue( subfield="n", message=" volume title exists but no volume number", ) volume_index = re.findall(r"\d+", val_n) if val_n else None if volume_index and len(volume_index) > 1: raise UnexpectedValue( subfield="n", message=" volume has more than one digit " ) else: volume_number = extract_volume_number( val_n, raise_exception=True, subfield="n" ) obj = {"title": val_p or volume_title} if val_y: if re.match("\\d+", val_y) and 1600 <= int(val_y) <= 2021: obj["publication_year"] = int(val_y) else: raise UnexpectedValue( subfield="y", message=" unrecognized publication year" ) _insert_volume(_migration, volume_number, obj) if not volume_title: raise MissingRequiredField( subfield="a", message=" this record is missing a main title" ) # series created return _migration
def alternative_titles_doc(self, key, value): """Alternative titles.""" _alternative_titles = self.get("alternative_titles", []) if key == "242__": _alternative_titles += alternative_titles(self, key, value) elif key == "246__": if ("n" in value and "p" not in value) or ("n" not in value and "p" in value): raise MissingRequiredField(subfield="n or p") if "p" in value: _migration = self.get("_migration", {}) if "volumes" not in _migration: _migration["volumes"] = [] val_n = clean_val("n", value, str) _migration["volumes"].append({ "volume": extract_volume_number(val_n, raise_exception=True), "title": clean_val("p", value, str), }) _migration["is_multipart"] = True _migration["record_type"] = "multipart" self["_migration"] = _migration raise IgnoreKey("alternative_titles") else: if "a" in value: _alternative_titles.append({ "value": clean_val("a", value, str, req=True), "type": "ALTERNATIVE_TITLE", }) if "b" in value: _alternative_titles.append({ "value": clean_val("b", value, str, req=True), "type": "SUBTITLE", }) return _alternative_titles
def extract_volume_number(value, search=False, raise_exception=False, subfield=None): """Extract the volume number from a string, returns None if not matched.""" regex = RE_VOLUME_NUMBER if search: func = regex.search else: func = regex.match result = func(value.strip()) if result: return result.group(1) if raise_exception: raise MissingRequiredField(subfield=subfield, message=" failed to parse volume number") return None
def clean_val( subfield, value, var_type, req=False, regex_format=None, default=None, manual=False, transform=None, multiple_values=False, ): """ Tests values using common rules. :param subfield: marcxml subfield indicator :param value: marcxml value :param var_type: expected type for value to be cleaned :param req: specifies if the value is required in the end schema :param regex_format: specifies if the value should have a pattern :param default: if value is missing and required it outputs default :param manual: if the value should be cleaned manually during the migration :param transform: string transform function (or callable) :param multiple_values: allow multiple values in subfield :return: cleaned output value """ def _clean(value_to_clean): if value_to_clean is not None: try: if var_type is str: return clean_str(value_to_clean, regex_format, req, transform) elif var_type is bool: return bool(value_to_clean) elif var_type is int: return int(value_to_clean) else: raise NotImplementedError except ValueError: raise UnexpectedValue(subfield=subfield) except TypeError: raise UnexpectedValue(subfield=subfield) except (UnexpectedValue, MissingRequiredField) as e: e.subfield = subfield e.message += str(force_list(value)) raise e to_clean = value.get(subfield) if manual and to_clean: raise ManualImportRequired(subfield=subfield) if req and to_clean is None: if default: return default raise MissingRequiredField(subfield=subfield) if multiple_values and type(to_clean) is tuple: cleaned_values = [] for v in to_clean: cleaned_values.append(_clean(v)) return cleaned_values elif not multiple_values and type(to_clean) is tuple: raise UnexpectedValue(subfield=subfield) else: return _clean(to_clean)