def choose_term(terms, programme: str = None, akvo: str = None, is_program: bool = False): res = [] l = len(terms) if l == 1: term = terms[0] res += get_taxonomy_json("studyfields", slug=term.slug).paginated_data if l > 1: if is_program: new_terms = [term for term in terms if not term.parent_slug] if new_terms: terms = new_terms if akvo: for term in terms: extra_data = term.extra_data term_akvo = extra_data.get("AKVO") if term_akvo and term_akvo == akvo: return get_taxonomy_json("studyfields", slug=term.slug).paginated_data if programme: parent_slug = f"p-{slugify(programme)}" for term in terms: if term.parent_slug == parent_slug: res += get_taxonomy_json("studyfields", slug=term.slug).paginated_data break if len(res) == 0: res += get_taxonomy_json("studyfields", slug=terms[0].slug).paginated_data else: res += get_taxonomy_json("studyfields", slug=terms[0].slug).paginated_data return {"studyField": res}
def get_subject_by_title( value: str, lang: str, ) -> Union[None, List]: value = value.strip() value = value.replace('"', '') value = value.replace('\\', '') if len(lang) != 2: lang = lang[:2] if len(value) == 0 or not value: return query = find_in_title_jsonb("subjects", f"title.{lang}", value) terms = query.all() if not terms: return elif len(terms) == 1: return get_taxonomy_json(code="subjects", slug=terms[0].slug).paginated_data else: res = [] for term in terms: extra_data = term.extra_data title = extra_data.get("title", {}).get(lang) if title == value: res.extend( get_taxonomy_json(code="subjects", slug=term.slug).paginated_data) return res
def certified_methodologies(el, **kwargs): if el.lower().strip() == "certifikovaná metodika": res = get_taxonomy_json( code="resourceType", slug="methodologies-and-procedures/certified-methodologies" "").paginated_data N_type = get_taxonomy_json(code="Ntype", slug="a").paginated_data return {"resourceType": res, "N_type": N_type}
def accessibility(el, **kwargs): el = el[0][0] date = kwargs["results"][0]["dateIssued"] if date <= "2005": return OAITransformer.PROCESSED if el == 'V': access_rights = get_taxonomy_json(code="accessRights", slug="c_abf2").paginated_data accessibility = { "cs": "Volně dostupné v digitálním repozitáři UK.", "en": "Freely available in the Charles University Digital Repository." } elif el == 'N': access_rights = get_taxonomy_json(code="accessRights", slug="c_16ec").paginated_data accessibility = { "cs": "Práce je dočasně nebo trvale vyloučena ze zveřejnění. Pro vyžádání kopie " "dokumentu je potřeba vyplnit formulář v digitálním repozitáři UK.", "en": "The work is temporarily or permanently excluded from publication. To request a " "copy of the document, it is necessary to fill in the form in the Charles " "University Digital Repository." } elif el == 'P': access_rights = get_taxonomy_json(code="accessRights", slug="c_abf2").paginated_data accessibility = { "cs": "Práce je dočasně nebo trvale vyloučena ze zveřejnění. Pro vyžádání kopie " "dokumentu je potřeba vyplnit formulář v digitálním repozitáři UK.", "en": "Freely available without attachments of the thesis in the Charles University " "Digital Repository." } else: access_rights = get_taxonomy_json(code="accessRights", slug="c_16ec").paginated_data accessibility = { "cs": "Volně dostupné vyjma příloh práce v digitálním repozitáři UK.", "en": "The work is temporarily or permanently excluded from publication. To request a " "copy of the document, it is necessary to fill in the form in the Charles " "University Digital Repository." } return {"accessRights": access_rights, "accessibility": accessibility}
def test_get_taxonomy_term(app, db, taxonomy_tree): paginator = get_taxonomy_json(code="test_taxonomy", slug="a/b") res = paginator.paginated_data assert isinstance(res, dict) assert "ancestors" in res.keys() assert "children" not in res.keys() assert "links" in res.keys()
def add_item_relation_type(data): if "relatedItem" not in data: return data resource_type = data.get("resourceType") resource_type = [_ for _ in resource_type if not _["is_ancestor"]] mapping = { "conference-papers": "isPartOf", "articles": "isPartOf", "conference-proceedings": "hasVersion", "books": "hasVersion", "conference-posters": "isPartOf", "research-reports": "isPartOf", } for _ in resource_type: link = _["links"]["self"] if link.endswith("/"): link = link.rstrip("/") slug = link.split("/")[-1] relation_type_slug = mapping.get(slug) if not relation_type_slug: return data else: taxonomy_json = get_taxonomy_json( code="itemRelationType", slug=relation_type_slug.lower()).paginated_data if taxonomy_json: for _ in data["relatedItem"]: _["itemRelationType"] = taxonomy_json return data
def get_psh(el): url = el.get("0") slug = url.split("/")[-1].lower() query = get_query_by_slug("subjects", slug) term = query.one_or_none() if not term: return return get_taxonomy_json(code="subjects", slug=term.slug).paginated_data
def add_provider(data): ins_tax_dict = get_taxonomy_json(code="institutions", slug="00216208").paginated_data if "provider" not in data: data["provider"] = ins_tax_dict if "entities" not in data: data["entities"] = ins_tax_dict return data
def test_get_taxonomy_json_2(app, db, taxonomy_tree): paginator = get_taxonomy_json(code="test_taxonomy", slug="a/b", prefer=Representation( "representation", include=[INCLUDE_DESCENDANTS])) res = paginator.paginated_data assert "children" in res.keys()
def get_contributor_role(role: str): query = get_query_by_slug(taxonomy_code="contributor-type", slug=role) term = query.one_or_none() if term: taxonomy_json = get_taxonomy_json(code="contributor-type", slug=term.slug).paginated_data else: taxonomy_json = [] return taxonomy_json
def taxonomy_term_update(*args, **kwargs): term = kwargs["term"] taxonomy = kwargs["taxonomy"] url = term.links().envelope["self"] content = get_taxonomy_json(code=taxonomy.code, slug=term.slug).paginated_data async_reference_content_changed.apply_async(args=[content], kwargs={"ref_url": url}, link=unlock_term.s(url=url))
def get_right_json(text): license = rights_dict().get(text) if not license: return slug = license.replace("-", "_").lower() query = get_query_by_slug(taxonomy_code="licenses", slug=slug) term = query.one_or_none() taxonomy_json = get_taxonomy_json(code="licenses", slug=term.slug) return taxonomy_json.paginated_data
def get_access_rights(text=None, slug=None): access_right_dict = get_access_rights_dict() if not slug: sentence_dict = { "Dokument je dostupný v repozitáři Akademie věd.": "1", "Dokumenty jsou dostupné v systému NK ČR.": "1", "Plný text je dostupný v Digitální knihovně VUT.": "1", "Dostupné v digitálním repozitáři VŠE.": "1", "Plný text je dostupný v digitálním repozitáři JČU.": "1", "Dostupné v digitálním repozitáři UK.": "1", "Dostupné v digitálním repozitáři Mendelovy univerzity.": "1", "Dostupné v repozitáři ČZU.": "1", "Dostupné registrovaným uživatelům v digitálním repozitáři AMU.": "2", "Dokument je dostupný v NLK. Dokument je dostupný též v digitální formě v Digitální " "knihovně NLK. Přístup může být vázán na prohlížení z počítačů NLK.": "2", "Dostupné v digitálním repozitáři UK (pouze z IP adres univerzity).": "2", "Text práce je neveřejný, pro více informací kontaktujte osobu uvedenou v repozitáři " "Mendelovy univerzity.": "2", "Dokument je dostupný na vyžádání prostřednictvím repozitáře Akademie věd.": "2", "Dokument je dostupný v příslušném ústavu Akademie věd ČR.": "0", "Dokument je po domluvě dostupný v budově Ministerstva životního prostředí.": "0", "Plný text není k dispozici.": "0", "Dokument je dostupný v NLK.": "0", 'Dokument je po domluvě dostupný v budově <a ' 'href=\"http://www.mzp.cz/__C125717D00521D29.nsf/index.html\" ' 'target=\"_blank\">Ministerstva životního prostředí</a>.': "0", "Dostupné registrovaným uživatelům v knihovně Mendelovy univerzity v Brně.": "0", 'Dostupné registrovaným uživatelům v repozitáři ČZU.': "2", 'Dokument je dostupný na externích webových stránkách.': "0", } slug = access_right_dict.get(sentence_dict.get(text, "0")) return get_taxonomy_json(code="accessRights", slug=slug).paginated_data
def resource_type(el, **kwargs): if el == "metodiky": record = kwargs["record"] if "336__" not in record: return { "resourceType": get_taxonomy_json( code="resourceType", slug= "methodologies-and-procedures/methodologies-without-certification" ).paginated_data } else: return OAITransformer.PROCESSED term = find_in_json_list("resourceType", "nuslType", el).one_or_none() return { "resourceType": get_taxonomy_json(code="resourceType", slug=term.slug).paginated_data }
def degree_grantor(el, **kwargs): unit = el[-1] term = get_institution_term( unit, reversed_grantor_array=[unit, "Univerzita Karlova"]) if term: return { "degreeGrantor": get_taxonomy_json("institutions", slug=term.slug).paginated_data } else: return OAITransformer.PROCESSED
def publication_place(el, **kwargs): value = el[-1] assert isinstance(value, str) return { "publicationPlace": { "place": value, "country": get_taxonomy_json(code="countries", slug="cz").paginated_data } }
def get_mednas(el): slug = el.get("7") or el.get("a", "") query = get_query_by_slug("subjects", slug) try: term = query.one_or_none() except ProgrammingError: # pragma: no cover db.session.commit() return if term: return get_taxonomy_json(code="subjects", slug=term.slug).paginated_data
def get_czmesh(el): slug = el.get("7", "").lower() query = get_query_by_slug("subjects", slug) try: term = query.one_or_none() except ProgrammingError: # pragma: no cover db.session.commit() return if not term: # pragma: no cover return return get_taxonomy_json(code="subjects", slug=term.slug).paginated_data
def add_reference(self, ref): slug, taxonomy_code = get_slug_from_link(ref) try: term_array = get_taxonomy_json(code=taxonomy_code, slug=slug).paginated_data for term in term_array: link = self.term_link(term) self._add_term_internal(link, term) self.validated_terms.add(link) except NoResultFound: raise ValidationError( f"Taxonomy term '{taxonomy_code}/{slug}' has not been found")
def provider(el, **kwargs): slug = provider_mapping().get(el) if not slug: # pragma: no cover return OAITransformer.PROCESSED return { "_administration": { "state": "new", "primaryCommunity": slug, "communities": [] }, "provider": get_taxonomy_json(code="institutions", slug=slug).paginated_data }
def test_get_taxonomy_json_3(app, db, taxonomy_tree): include = [ INCLUDE_URL, INCLUDE_DESCENDANTS_URL, INCLUDE_DESCENDANTS_COUNT, INCLUDE_ANCESTORS_HIERARCHY, INCLUDE_ANCESTORS, INCLUDE_ANCESTOR_LIST, INCLUDE_DATA, INCLUDE_ID, INCLUDE_DESCENDANTS, INCLUDE_ENVELOPE, INCLUDE_DELETED, INCLUDE_SLUG, INCLUDE_LEVEL, INCLUDE_STATUS, INCLUDE_SELF ] paginator = get_taxonomy_json(code="test_taxonomy", slug="a/b", prefer=Representation("representation", include=include)) res = paginator.paginated_data pprint(res)
def parse_place(place: str): res = {} place_array = place.strip().rsplit("(", 1) country = place_array[-1].replace(")", "").strip().lower() place = place_array[0].strip() if place: res["place"] = place try: term = get_query_by_slug(taxonomy_code="countries", slug=country).one_or_none() except ProgrammingError: db.session.commit() return res if term: res["country"] = get_taxonomy_json(code="countries", slug=term.slug).paginated_data return res
def degree_grantor_2(el, **kwargs): term = None if isinstance(el, (list, tuple)): for _ in el: term = get_grantor_term(_) if term: break if isinstance(el, dict): term = get_grantor_term(el) if term: return { "degreeGrantor": get_taxonomy_json(code="institutions", slug=term.slug).paginated_data }
def get_person(person, contributor_list, creator_list): if person.get('a'): creator_list.append({ "name": person.get('a'), }) if person.get('i'): data_ = { "name": person.get('i'), } term = get_role(person.get('e')) if term: slug = term.slug role = get_taxonomy_json(code="contributor-type", slug=slug).paginated_data data_["role"] = role contributor_list.append(data_)
def degree_grantor(el, **kwargs): if "," in el: grantor_array = [x.strip() for x in el.split(",", maxsplit=2) if x.strip()] elif "." in el: grantor_array = [x.strip() for x in el.split(".", maxsplit=2) if x.strip()] else: grantor_array = [el] reversed_grantor_array = list(reversed(grantor_array)) for reversed_level, unit in enumerate(reversed_grantor_array): term = get_institution_term(unit, reversed_grantor_array, reversed_level) if term: return { "degreeGrantor": get_taxonomy_json(code="institutions", slug=term.slug).paginated_data } else: return OAITransformer.PROCESSED
def resourceType(el, **kwargs): assert isinstance(el, list) assert len(el) <= 1 el = el[-1] rt_dict = { "diplomová práce": "master_theses", "bakalářská práce": "bachelor_theses", "dizertační práce": "doctoral_theses", "rigorózní práce": "rigorous_theses", } slug = rt_dict.get(el) if slug: slug = "theses_etds." + slug return { "resourceType": get_taxonomy_json(code="resourceType", slug=slug).paginated_data }
def degree_grantor_3(el, **kwargs): value = el[-1] assert isinstance(value, str) grantor_array = value.split(",") grantor_array = [unit.strip() for unit in grantor_array] reversed_grantor_array = list(reversed(grantor_array)) if "Univerzita Karlova" not in reversed_grantor_array: reversed_grantor_array.append("Univerzita Karlova") for reversed_level, unit in enumerate(reversed_grantor_array): term = get_institution_term(unit, reversed_grantor_array, reversed_level) if term: return { "degreeGrantor": get_taxonomy_json(code="institutions", slug=term.slug).paginated_data } else: return OAITransformer.PROCESSED
def date_issued(el, **kwargs): assert isinstance(el, list), "Element should be list" el = el[-1][-1] arrow_date = arrow.get(el) access_rights = [] if arrow_date < arrow.get("2006-01-01"): access_rights = get_taxonomy_json(code="accessRights", slug="c_16ec").paginated_data accessibility = { "cs": "Dostupné v digitálním repozitáři UK (pouze z IP adres univerzity).", "en": "Available in the Charles University Digital Repository (accessible only from " "computers with university IP address)." } if access_rights: return { "dateIssued": el, "accessRights": access_rights, "accessibility": accessibility } else: return {"dateIssued": el}
def resolve_links(self, in_data, **kwargs): if isinstance(in_data, dict): try: link = in_data["links"]["self"] except KeyError: link = None elif isinstance(in_data, str): link = extract_link(in_data) if link: in_data = {"links": {"self": link}} else: raise TypeError("Input data have to be json or string") if link: slug, taxonomy_code = get_slug_from_link(link) try: in_data.update(**get_taxonomy_json(code=taxonomy_code, slug=slug).paginated_data) except NoResultFound: raise NoResultFound( f"Taxonomy '{taxonomy_code}/{slug}' has not been found") else: raise ValidationError( "Input data does not contain link to taxonomy reference") return in_data
def get_funder_from_id(funder_id: str): dict_ = { '1A': 'MZ0', '1B': 'MZE', '1C': 'MZP', '1D': 'MZP', '1E': 'AV0', '1F': 'MD0', '1G': 'MZE', '1H': 'MPO', '1I': 'MZP', '1J': 'MPS', '1K': 'MSM', '1L': 'MSM', '1M': 'MSM', '1N': 'MSM', '1P': 'MSM', '1Q': 'AV0', '1R': 'MZE', '2A': 'MPO', '2B': 'MSM', '2C': 'MSM', '2D': 'MSM', '2E': 'MSM', '2F': 'MSM', '2G': 'MSM', '7A': 'MSM', '7B': 'MSM', '7C': 'MSM', '7D': 'MSM', '7E': 'MSM', '7F': 'MSM', '7G': 'MSM', '7H': 'MSM', '8A': 'MSM', '8B': 'MSM', '8C': 'MSM', '8D': 'MSM', '8E': 'MSM', '8F': 'MSM', '8G': 'MSM', '8H': 'MSM', '8J': 'MSM', '8X': 'MSM', 'AA': 'CBU', 'AB': 'CBU', 'BI': 'BIS', 'CA': 'MD0', 'CB': 'MD0', 'CC': 'MD0', 'CD': 'MI0', 'CE': 'MD0', 'CF': 'MI0', 'CG': 'MD0', 'CI': 'MD0', 'DA': 'MK0', 'DB': 'MK0', 'DC': 'MK0', 'DD': 'MK0', 'DE': 'MK0', 'DF': 'MK0', 'DG': 'MK0', 'DM': 'MK0', 'EA': 'MPO', 'EB': 'MPO', 'EC': 'MPO', 'ED': 'MSM', 'EE': 'MSM', 'EF': 'MSM', 'EG': 'MPO', 'EP': 'MZE', 'FA': 'MPO', 'FB': 'MPO', 'FC': 'MPO', 'FD': 'MPO', 'FE': 'MPO', 'FF': 'MPO', 'FI': 'MPO', 'FR': 'MPO', 'FT': 'MPO', 'FV': 'MPO', 'GA': 'GA0', 'GB': 'GA0', 'GC': 'GA0', 'GD': 'GA0', 'GE': 'GA0', 'GF': 'GA0', 'GH': 'GA0', 'GJ': 'GA0', 'GK': 'MK0', 'GP': 'GA0', 'GS': 'GA0', 'GV': 'GA0', 'GX': 'GA0', 'HA': 'MPS', 'HB': 'MPS', 'HC': 'MPS', 'HR': 'MPS', 'HS': 'MPS', 'IA': 'AV0', 'IB': 'AV0', 'IC': 'AV0', 'ID': 'MSM', 'IE': 'MZE', 'IN': 'MSM', 'IP': 'AV0', 'IS': 'MSM', 'JA': 'SUJ', 'JB': 'SUJ', 'JC': 'SUJ', 'KA': 'AV0', 'KJ': 'AV0', 'KK': 'MK0', 'KS': 'AV0', 'KZ': 'MK0', 'LA': 'MSM', 'LB': 'MSM', 'LC': 'MSM', 'LD': 'MSM', 'LE': 'MSM', 'LF': 'MSM', 'LG': 'MSM', 'LH': 'MSM', 'LI': 'MSM', 'LJ': 'MSM', 'LK': 'MSM', 'LL': 'MSM', 'LM': 'MSM', 'LN': 'MSM', 'LO': 'MSM', 'LP': 'MSM', 'LQ': 'MSM', 'LR': 'MSM', 'LS': 'MSM', 'LT': 'MSM', 'LZ': 'MSM', 'ME': 'MSM', 'MI': 'URV', 'MP': 'MPO', 'NA': 'MZ0', 'NB': 'MZ0', 'NC': 'MZ0', 'ND': 'MZ0', 'NE': 'MZ0', 'NF': 'MZ0', 'NG': 'MZ0', 'NH': 'MZ0', 'NI': 'MZ0', 'NJ': 'MZ0', 'NK': 'MZ0', 'NL': 'MZ0', 'NM': 'MZ0', 'NN': 'MZ0', 'NO': 'MZ0', 'NR': 'MZ0', 'NS': 'MZ0', 'NT': 'MZ0', 'NV': 'MZ0', 'OB': 'MO0', 'OC': 'MSM', 'OD': 'MO0', 'OE': 'MSM', 'OF': 'MO0', 'OK': 'MSM', 'ON': 'MO0', 'OP': 'MO0', 'OR': 'MO0', 'OS': 'MO0', 'OT': 'MO0', 'OV': 'MO0', 'OW': 'MO0', 'PD': 'MD0', 'PG': 'MSM', 'PK': 'MK0', 'PL': 'MZ0', 'PR': 'MPO', 'QA': 'MZE', 'QB': 'MZE', 'QC': 'MZE', 'QD': 'MZE', 'QE': 'MZE', 'QF': 'MZE', 'QG': 'MZE', 'QH': 'MZE', 'QI': 'MZE', 'QJ': 'MZE', 'QK': 'MZE', 'RC': 'MS0', 'RD': 'MS0', 'RE': 'MZE', 'RK': 'MK0', 'RM': 'MZV', 'RP': 'MPO', 'RZ': 'MZ0', 'SA': 'MZP', 'SC': 'MZP', 'SD': 'MZP', 'SE': 'MZP', 'SG': 'MZP', 'SH': 'MZP', 'SI': 'MZP', 'SJ': 'MZP', 'SK': 'MZP', 'SL': 'MZP', 'SM': 'MZP', 'SN': 'MZP', 'SP': 'MZP', 'ST': 'NBU', 'SU': 'NBU', 'SZ': 'MZP', 'TA': 'TA0', 'TB': 'TA0', 'TD': 'TA0', 'TE': 'TA0', 'TF': 'TA0', 'TG': 'TA0', 'TH': 'TA0', 'TI': 'TA0', 'TJ': 'TA0', 'TK': 'TA0', 'TL': 'TA0', 'TM': 'TA0', 'TN': 'TA0', 'TO': 'TA0', 'TR': 'MPO', 'UB': 'KHK', 'UC': 'KHK', 'UD': 'KLI', 'UE': 'KKV', 'UF': 'KHP', 'UH': 'KHP', 'US': 'MV0', 'VA': 'MV0', 'VD': 'MV0', 'VE': 'MV0', 'VF': 'MV0', 'VG': 'MV0', 'VH': 'MV0', 'VI': 'MV0', 'VS': 'MSM', 'VV': 'MSM', 'VZ': 'MSM', 'WA': 'MMR', 'WB': 'MMR', 'WD': 'MMR', 'WE': 'MMR', 'YA': 'MI0', 'ZO': 'MZP', 'ZZ': 'MZP', 'RV': 'MPS', 'PV': 'MSM', 'PT': 'MH0', 'RS': 'MSM', 'PZ': 'MH0', 'RB': 'MZV', 'PE': 'MH0', 'RN': 'MV0', 'PI': 'MH0', 'RH': 'MH0', 'PO': 'MH0', 'IZ': 'MZ0', 'ZK': 'CUZ', 'RO': 'MO0', 'RR': 'MZP', 'TC': 'MPO', 'MO': 'MO0', 'MH': 'MH0', 'MJ': 'URV', 'MR': 'MZP', 'SB': 'MZP', 'OU': 'MSM', 'UA': 'KUL', 'SF': 'MZP', '8I': 'MSM', 'CK': 'TA0', 'FW': 'TA0', 'FX': 'MPO', 'GM': 'GA0', 'NU': 'MZ0', 'OY': 'MO0', 'SS': 'TA0', 'TP': 'TA0', 'VJ': 'MV0' } id_prefix = funder_id[:2] slug = dict_.get(id_prefix) if not slug: return query = get_query_by_slug(taxonomy_code="funders", slug=slug) term = query.one_or_none() if not term: # pragma: no cover return return get_taxonomy_json(code="funders", slug=term.slug).paginated_data