Example #1
0
def barcode(self, key, value):
    """Translates the barcodes."""
    _migration = self["_migration"]
    for v in force_list(value):
        val_a = clean_val("a", v, str)
        val_n = clean_val("n", v, str)
        val_x = clean_val("x", v, str)
        val_9 = clean_val("9", v, str)
        if val_a or val_9:
            if val_n or val_x or val_a and val_9:
                raise UnexpectedValue()
            identifier = {"scheme": "report_number", "value": val_a or val_9}
            if val_9:
                identifier["hidden"] = True
            identifiers = self.get("identifiers", [])
            identifiers.append(identifier)
            self["identifiers"] = identifiers
            raise IgnoreKey("barcode")

        if val_n and val_x:
            volume_number = extract_volume_number(
                val_n, raise_exception=True, subfield="n"
            )
            _insert_volume(_migration, volume_number, {"barcode": val_x})
        elif val_x:
            raise MissingRequiredField(
                subfield="n", message=" this record is missing a volume number"
            )
        else:
            raise MissingRequiredField(
                subfield="x",
                message=" this record is missing a barcode number",
            )
    raise IgnoreKey("barcode")
Example #2
0
def standard_numbers(self, key, value):
    """Translates standard numbers values."""
    _identifiers = self.get("identifiers", [])
    a = clean_val("a", value, str)
    b = clean_val("b", value, str)
    sn = a or b
    if sn:
        _identifiers.append({
            "value": sn,
            "scheme": "STANDARD_NUMBER",
        })
        return _identifiers
    raise MissingRequiredField(subfield="a or b")
Example #3
0
def report_numbers(self, key, value):
    """Translates report_numbers fields."""
    def get_value_rn(f_a, f_z, f_9, rn_obj):
        rn_obj.update({"value": f_a or f_z or f_9, "scheme": "REPORT_NUMBER"})
        if f_z or f_9:
            rn_obj.update({"hidden": True})

    _identifiers = self.get("identifiers", [])

    sub_9 = clean_val("9", value, str)
    sub_a = clean_val("a", value, str)
    sub_z = clean_val("z", value, str)

    all_empty = not (sub_z or sub_a or sub_9)

    if key == "037__":
        entry = {}
        if all_empty:
            raise MissingRequiredField(subfield="9 or a or z")

        if sub_9 == "arXiv":
            arxiv_eprints(self, key, value)
            raise IgnoreKey("identifiers")
        else:
            get_value_rn(sub_a, sub_z, sub_9, entry)
        _identifiers.append(entry)

    if key == "088__":
        entry = {}
        if "n" in value or "x" in value:
            barcodes(self, key, value)

        if all_empty and "n" not in value and "x" not in value:
            raise MissingRequiredField(subfield="9 or a or z or n or x")

        get_value_rn(sub_a, sub_z, sub_9, entry)
        _identifiers.append(entry)
    return _identifiers
Example #4
0
def number_of_volumes(self, key, value):
    """Translates number of volumes."""
    _series_title = self.get("title", None)
    if not _series_title:
        raise MissingRequiredField(
            subfield="a", message=" this record is missing a main title"
        )
    val_a = clean_val("a", value, str)
    parsed_a = extract_parts(val_a)
    if not parsed_a["number_of_pages"] and ("v" in val_a or "vol" in val_a):
        _volumes = re.findall(r"\d+", val_a)
        if _volumes:
            return _volumes[0]
    raise IgnoreKey("number_of_volumes")
Example #5
0
def migration(self, key, value):
    """Translates volumes titles."""
    _series_title = self.get("title", None)

    volume_title = self.get("title", None)

    _migration = self["_migration"]

    for v in force_list(value):
        # check if it is a multipart monograph
        val_n = clean_val("n", v, str)
        val_p = clean_val("p", v, str)
        val_y = clean_val("y", v, str)
        if not val_n and not val_p:
            raise UnexpectedValue(
                subfield="n", message=" this record is probably not a series"
            )
        if val_p and not val_n:
            raise UnexpectedValue(
                subfield="n",
                message=" volume title exists but no volume number",
            )

        volume_index = re.findall(r"\d+", val_n) if val_n else None
        if volume_index and len(volume_index) > 1:
            raise UnexpectedValue(
                subfield="n", message=" volume has more than one digit "
            )
        else:
            volume_number = extract_volume_number(
                val_n, raise_exception=True, subfield="n"
            )
            obj = {"title": val_p or volume_title}
            if val_y:
                if re.match("\\d+", val_y) and 1600 <= int(val_y) <= 2021:
                    obj["publication_year"] = int(val_y)
                else:
                    raise UnexpectedValue(
                        subfield="y", message=" unrecognized publication year"
                    )
            _insert_volume(_migration, volume_number, obj)
    if not volume_title:
        raise MissingRequiredField(
            subfield="a", message=" this record is missing a main title"
        )

    # series created

    return _migration
Example #6
0
def alternative_titles_doc(self, key, value):
    """Alternative titles."""
    _alternative_titles = self.get("alternative_titles", [])

    if key == "242__":
        _alternative_titles += alternative_titles(self, key, value)
    elif key == "246__":
        if ("n" in value and "p" not in value) or ("n" not in value
                                                   and "p" in value):
            raise MissingRequiredField(subfield="n or p")

        if "p" in value:
            _migration = self.get("_migration", {})
            if "volumes" not in _migration:
                _migration["volumes"] = []

            val_n = clean_val("n", value, str)
            _migration["volumes"].append({
                "volume":
                extract_volume_number(val_n, raise_exception=True),
                "title":
                clean_val("p", value, str),
            })
            _migration["is_multipart"] = True
            _migration["record_type"] = "multipart"
            self["_migration"] = _migration
            raise IgnoreKey("alternative_titles")
        else:
            if "a" in value:
                _alternative_titles.append({
                    "value":
                    clean_val("a", value, str, req=True),
                    "type":
                    "ALTERNATIVE_TITLE",
                })
            if "b" in value:
                _alternative_titles.append({
                    "value":
                    clean_val("b", value, str, req=True),
                    "type":
                    "SUBTITLE",
                })
            return _alternative_titles
Example #7
0
def extract_volume_number(value,
                          search=False,
                          raise_exception=False,
                          subfield=None):
    """Extract the volume number from a string, returns None if not matched."""
    regex = RE_VOLUME_NUMBER
    if search:
        func = regex.search
    else:
        func = regex.match

    result = func(value.strip())
    if result:
        return result.group(1)

    if raise_exception:
        raise MissingRequiredField(subfield=subfield,
                                   message=" failed to parse volume number")

    return None
Example #8
0
def clean_val(
    subfield,
    value,
    var_type,
    req=False,
    regex_format=None,
    default=None,
    manual=False,
    transform=None,
    multiple_values=False,
):
    """
    Tests values using common rules.

    :param subfield: marcxml subfield indicator
    :param value: marcxml value
    :param var_type: expected type for value to be cleaned
    :param req: specifies if the value is required in the end schema
    :param regex_format: specifies if the value should have a pattern
    :param default: if value is missing and required it outputs default
    :param manual: if the value should be cleaned manually during the migration
    :param transform: string transform function (or callable)
    :param multiple_values: allow multiple values in subfield
    :return: cleaned output value
    """
    def _clean(value_to_clean):
        if value_to_clean is not None:
            try:
                if var_type is str:
                    return clean_str(value_to_clean, regex_format, req,
                                     transform)
                elif var_type is bool:
                    return bool(value_to_clean)
                elif var_type is int:
                    return int(value_to_clean)
                else:
                    raise NotImplementedError
            except ValueError:
                raise UnexpectedValue(subfield=subfield)
            except TypeError:
                raise UnexpectedValue(subfield=subfield)
            except (UnexpectedValue, MissingRequiredField) as e:
                e.subfield = subfield
                e.message += str(force_list(value))
                raise e

    to_clean = value.get(subfield)

    if manual and to_clean:
        raise ManualImportRequired(subfield=subfield)
    if req and to_clean is None:
        if default:
            return default
        raise MissingRequiredField(subfield=subfield)

    if multiple_values and type(to_clean) is tuple:
        cleaned_values = []
        for v in to_clean:
            cleaned_values.append(_clean(v))
        return cleaned_values
    elif not multiple_values and type(to_clean) is tuple:
        raise UnexpectedValue(subfield=subfield)
    else:
        return _clean(to_clean)