Esempio n. 1
0
def tags(self, key, value):
    """Translates tag field - WARNING - also document type and serial field."""
    _tags = self.get("tags", [])
    for v in force_list(value):
        result_a = mapping(COLLECTION, clean_val("a", v, str))
        result_b = mapping(COLLECTION, clean_val("b", v, str))
        if result_a:
            _tags.append(result_a) if result_a not in _tags else None
        if result_b:
            _tags.append(result_b) if result_b not in _tags else None
        if not result_a and not result_b:
            special_serials(self, key, value)
    return _tags
Esempio n. 2
0
def medium(self, key, value):
    """Translates medium."""
    _migration = self.get("_migration", {})
    item_mediums = _migration.get("item_medium", [])
    barcodes = []
    _medium = None
    val_x = value.get("x")

    if val_x:
        barcodes = [barcode for barcode in force_list(val_x) if barcode]

    val_a = clean_val("a", value, str)
    if val_a:
        _medium = mapping(ITEMS_MEDIUMS,
                          val_a.upper().replace("-", ""),
                          raise_exception=True,
                          subfield="a")

    for barcode in barcodes:
        current_item = {
            "barcode": barcode,
        }
        if _medium:
            current_item.update({"medium": _medium})
        if current_item not in item_mediums:
            item_mediums.append(current_item)
    if item_mediums:
        _migration.update({
            "item_medium": item_mediums,
            "has_medium": True,
        })
    return _migration
Esempio n. 3
0
def licenses(self, key, value):
    """Translates license fields."""
    ARXIV_LICENSE = "arxiv.org/licenses/nonexclusive-distrib/1.0/"
    _license = dict()
    # license url
    license_url = clean_val("u", value, str)

    material = mapping(MATERIALS,
                       clean_val("3", value, str, transform="lower"),
                       raise_exception=True,
                       subfield="3")

    if material:
        _license["material"] = material

    internal_notes = clean_val("g", value, str)
    if internal_notes:
        _license["internal_notes"] = internal_notes

    license_id = clean_val("a", value, str)
    if not license_id:
        # check if there is the URL instead of the id
        # the only known URL at the moment is ArXiv
        if license_url and ARXIV_LICENSE in license_url:
            license_id = "arXiv-nonexclusive-distrib-1.0"

    if license_id:
        _license["license"] = dict(id=license_id)
    else:
        raise UnexpectedValue()

    return _license
Esempio n. 4
0
def isbns(self, key, value):
    """Translates isbns fields."""
    _isbns = self.get("identifiers", [])
    for v in force_list(value):
        subfield_u = clean_val("u", v, str)
        isbn = {
            "value": clean_val("a", v, str) or clean_val("z", v, str),
            "scheme": "ISBN",
        }
        if not isbn["value"]:
            raise IgnoreKey("identifiers")
        if subfield_u:
            volume = re.search(r"(\(*v[.| ]*\d+.*\)*)", subfield_u)

            if volume:
                volume = volume.group(1)
                subfield_u = subfield_u.replace(volume, "").strip()
                existing_volume = self.get("volume")
                if existing_volume:
                    raise ManualImportRequired(subfield="u")
                self["volume"] = volume
            # WARNING! vocabulary document_identifiers_materials
            material = mapping(IDENTIFIERS_MEDIUM_TYPES,
                               subfield_u,
                               subfield="u")
            if material:
                isbn.update({"material": material})
        if isbn not in _isbns:
            _isbns.append(isbn)
    return _isbns
Esempio n. 5
0
def collection(self, key, value):
    """Translates collection field - WARNING - also document type field."""
    _migration = self["_migration"]
    _tags = _migration["tags"]
    for v in force_list(value):
        result_a = mapping(COLLECTION, clean_val("a", v, str))
        result_b = mapping(COLLECTION, clean_val("b", v, str))
        if result_a:
            _tags.append(result_a) if result_a not in _tags else None
            _migration["has_tags"] = True
        if result_b:
            _tags.append(result_b) if result_b not in _tags else None
            _migration["has_tags"] = True
        if not result_a and not result_b:
            self["document_type"] = document_type(self, key, value)
            raise IgnoreKey("_migration")
    return _migration
Esempio n. 6
0
def add_eitem_extra_metadata(eitem, document):
    """Adds internal notes to the e-items."""
    PROVIDERS_MAPPING = {"safari": "SAF", "springer": "SPR", "ebl": "EBL"}
    internal_notes = document["_migration"].get("eitems_internal_notes")
    if internal_notes:
        eitem["internal_notes"] = internal_notes
        # It must be only one value to update created_by
        if ";" not in internal_notes:
            raw_provider = re.findall('^[A-Z]{3,4}', internal_notes)[0]
            provider = mapping(PROVIDERS_MAPPING,
                               raw_provider,
                               default_val=raw_provider)
            eitem["created_by"]["value"] = provider
Esempio n. 7
0
def dois(self, key, value):
    """Translates dois fields."""
    _identifiers = self.get("identifiers", [])
    dois_url_prefix = current_app.config['CDS_ILS_DOI_URL_PREFIX']

    def _clean_doi_access(subfield):
        return subfield.lower().replace("(open access)", "").strip()

    def clean_material(subfield_q):
        return re.sub(r'\([^)]*\)', '', subfield_q).strip()

    def create_eitem(subfield_a, subfield_q):
        eitems_external = self["_migration"]["eitems_external"]
        open_access = False
        if subfield_q:
            open_access = "open access" in subfield_q.lower()
            subfield_q = _clean_doi_access(subfield_q)
        eitem = {
            "url": {
                "description": subfield_q or "e-book",
                "value": dois_url_prefix.format(doi=subfield_a),
            },
            "open_access": open_access
        }
        eitems_external.append(eitem)

    for v in force_list(value):
        subfield_q = clean_val("q", v, str)
        subfield_a = clean_val("a", v, str, req=True)
        create_eitem(subfield_a=subfield_a, subfield_q=subfield_q)
        if subfield_q:
            subfield_q = clean_material(subfield_q)

        # vocabulary controlled
        material = mapping(
            IDENTIFIERS_MEDIUM_TYPES,
            subfield_q,
            raise_exception=True,
        )
        doi = {
            "value": subfield_a,
            "material": material,
            "scheme": "DOI",
        }
        if doi not in _identifiers:
            _identifiers.append(doi)

    self["_migration"]["eitems_has_external"] = True

    return _identifiers
Esempio n. 8
0
def copyright(self, key, value):
    """Translates copyright fields."""
    material = mapping(
        MATERIALS,
        clean_val("3", value, str, transform="lower"),
        raise_exception=True,
    )

    return {
        "material": material,
        "holder": clean_val("d", value, str),
        "statement": clean_val("f", value, str),
        "year": clean_val("g", value, int),
        "url": clean_val("u", value, str),
    }
Esempio n. 9
0
def licenses(self, key, value):
    """Translates license fields."""
    material = mapping(
        MATERIALS,
        clean_val("3", value, str, transform="lower"),
        raise_exception=True,
    )

    return {
        "license": {
            "url": clean_val("u", value, str),
            "name": clean_val("a", value, str),
        },
        "material": material,
        "internal_note": clean_val("g", value, str),
    }
Esempio n. 10
0
def special_serials(self, key, value):
    """Translates serial fields."""
    _migration = self["_migration"]
    _serials = _migration.get("serials", [])
    for v in force_list(value):
        result_a = mapping(SERIAL, clean_val("a", v, str))
        if result_a:
            _serials.append({
                "title": result_a,
                "volume": None,
                "issn": None,
            }) if result_a not in _serials else None
            _migration.update({"serials": _serials, "has_serial": True})
        if not result_a:
            self["document_type"] = document_type(self, key, value)
            raise IgnoreKey("_migration")
    return _migration
Esempio n. 11
0
def dois(self, key, value):
    """Translates dois fields."""
    _identifiers = self.get("identifiers", [])
    for v in force_list(value):
        material = mapping(
            MATERIALS,
            clean_val("q", v, str, transform="lower"),
            raise_exception=True,
        )
        doi = {
            "value": clean_val("a", v, str, req=True),
            "material": material,
            "source": clean_val("9", v, str),
            "scheme": "DOI",
        }
        if doi not in _identifiers:
            _identifiers.append(doi)
    return _identifiers
Esempio n. 12
0
def medium(self, key, value):
    """Translates medium."""
    _migration = self.get("_migration", {})
    item_mediums = _migration.get("item_medium", [])
    barcodes = force_list(value.get("x", ""))
    _medium = mapping(MEDIUMS,
                      clean_val("a", value, str).upper().replace('-', ''),
                      raise_exception=True)

    for barcode in barcodes:
        current_item = {
            "barcode": barcode,
            "medium": _medium,
        }
        if current_item not in item_mediums:
            item_mediums.append(current_item)
    _migration.update({
        "item_medium": item_mediums,
        "has_medium": True,
    })
    return _migration
Esempio n. 13
0
def standard_review(self, key, value):
    """Translates standard_status field."""
    _extensions = self.get("extensions", {})
    applicability_list = _extensions.get("standard_review_applicability", [])
    applicability = mapping(
        APPLICABILITY,
        clean_val("i", value, str),
    )
    if applicability and applicability not in applicability_list:
        applicability_list.append(applicability)
    if "z" in value:
        try:
            check_date = clean_val("z", value, str)
            # Normalise date
            for month in month_name[1:]:
                if month.lower() in check_date.lower():
                    check_date_month = month
            check_date_year = re.findall(r"\d+", check_date)
            if len(check_date_year) > 1:
                raise UnexpectedValue(subfield="z")
            datetime_object = datetime.datetime.strptime(
                "{} 1 {}".format(check_date_month, check_date_year[0]),
                "%B %d %Y",
            )

            check_date_iso = datetime_object.date().isoformat()
            _extensions.update({
                "standard_review_checkdate": check_date_iso,
            })
        except (ValueError, IndexError):
            raise UnexpectedValue(subfield="z")
    _extensions.update({
        "standard_review_applicability":
        applicability_list,
        "standard_review_standard_validity":
        clean_val("v", value, str),
        "standard_review_expert":
        clean_val("p", value, str),
    })
    return _extensions
Esempio n. 14
0
def created(self, key, value):
    """Translates created information to fields."""
    _created_by = self.get("created_by", {})
    if key == "916__":
        if "s" in value:
            _created_by.update({
                "type":
                mapping(
                    ACQUISITION_METHOD,
                    clean_val("s", value, str, default="migration"),
                    raise_exception=True,
                )
            })
            self["created_by"] = _created_by
            date = clean_val("w", value, int, regex_format=r"\d{6}$")
            if date:
                year, week = str(date)[:4], str(date)[4:]
                date = get_week_start(int(year), int(week))
                return date.isoformat()
    elif key == "595__":
        try:
            sub_a = clean_val("a",
                              value,
                              str,
                              regex_format=r"[A-Z]{3}[0-9]{6}$")
            if sub_a:
                source = sub_a[:3]
                self["source"] = source
                year, month = int(sub_a[3:7]), int(sub_a[7:])
                self["_created"] = datetime.date(year, month, 1).isoformat()
                raise IgnoreKey("_created")
        except UnexpectedValue as e:
            e.subfield = "a"
            self["internal_notes"] = internal_notes(self, key, value)
            raise IgnoreKey("_created")

    raise IgnoreKey("_created")
Esempio n. 15
0
def dois(self, key, value):
    """Translates dois fields."""
    _identifiers = self.get("identifiers", [])

    def _clean_doi_material(subfield):
        return subfield.lower().replace("(open access)", "")

    for v in force_list(value):
        subfield_q = clean_val("q", v, str, transform=_clean_doi_material)

        material = mapping(
            MATERIALS,
            subfield_q,
            raise_exception=True,
        )
        doi = {
            "value": clean_val("a", v, str, req=True),
            "material": material,
            "source": clean_val("9", v, str),
            "scheme": "DOI",
        }
        if doi not in _identifiers:
            _identifiers.append(doi)
    return _identifiers
Esempio n. 16
0
 def doc_type_mapping(val):
     if val:
         return mapping(DOCUMENT_TYPE, val)
Esempio n. 17
0
def created(self, key, value):
    """Translates created information to fields."""
    _created_by = self.get("created_by", {})
    if key == "916__":
        if "s" in value:
            _created_by.update({
                "type":
                mapping(
                    ACQUISITION_METHOD,
                    clean_val("s", value, str, default="migration"),
                    raise_exception=True,
                    subfield="s",
                )
            })
            self["created_by"] = _created_by
            date_values = clean_val("w",
                                    value,
                                    int,
                                    regex_format=r"^\d{6}$",
                                    multiple_values=True)
            if not date_values:
                return datetime.date.today().isoformat()
            if type(date_values) is list:
                date = min(date_values)
            else:
                date = date_values
            if not (100000 < date < 999999):
                raise UnexpectedValue("Wrong date format", subfield='w')
            if date:
                year, week = str(date)[:4], str(date)[4:]
                date = get_week_start(int(year), int(week))
                if date < datetime.date.today():
                    return date.isoformat()
                else:
                    return datetime.date.today().isoformat()
    elif key == "595__":
        try:
            _migration = self["_migration"]
            _eitems_internal_notes = _migration.get("eitems_internal_notes",
                                                    "")
            sub_a_internal_notes = clean_val(
                "a", value, str, regex_format=r"[A-Z]{3,4}[0-9]{4,6}$")
            if sub_a_internal_notes:
                if not _eitems_internal_notes:
                    _eitems_internal_notes = sub_a_internal_notes
                else:
                    _eitems_internal_notes += f"; {sub_a_internal_notes}"
                _migration.update(
                    {"eitems_internal_notes": _eitems_internal_notes})
        except UnexpectedValue as e:
            pass
        try:
            sub_a = clean_val("a",
                              value,
                              str,
                              regex_format=r"[A-Z]{3}[0-9]{6}$")
            if sub_a:
                source = sub_a[:3]
                self["source"] = source
                year, month = int(sub_a[3:7]), int(sub_a[7:])
                self["_created"] = datetime.date(year, month, 1).isoformat()
                raise IgnoreKey("_created")
        except UnexpectedValue as e:
            e.subfield, e.key = "a", key
            self["internal_notes"] = internal_notes(self, key, value)
            raise IgnoreKey("_created")

    raise IgnoreKey("_created")
Esempio n. 18
0
 def doc_type_mapping(val):
     if val:
         return mapping(DOCUMENT_TYPE, val, subfield="a or b")