def register_update_doc_into_kernel(xml_data): payload = {"data": xml_data["xml_url"], "assets": xml_data["assets"]} try: hooks.kernel_connect( "/documents/{}".format(xml_data["scielo_id"]), "PUT", payload ) except requests.exceptions.HTTPError as exc: raise RegisterUpdateDocIntoKernelException( 'Could not PUT document "{}" in Kernel : {}'.format( xml_data["xml_package_name"], str(exc) ) ) from None else: for pdf_payload in (xml_data or {}).get("pdfs", []): Logger.info('Putting Rendition "%s" to Kernel', pdf_payload["filename"]) try: hooks.kernel_connect( "/documents/{}/renditions".format(xml_data["scielo_id"]), "PATCH", pdf_payload, ) except requests.exceptions.HTTPError as exc: raise RegisterUpdateDocIntoKernelException( 'Could not PATCH rendition "{}" in Kernel : {}'.format( pdf_payload["filename"], str(exc) ) ) from None
def register_or_update(_id: str, payload: dict, entity_url: str): """Cadastra ou atualiza uma entidade no Kernel a partir de um payload""" try: response = hooks.kernel_connect(endpoint="{}{}".format( entity_url, _id), method="GET") except requests.exceptions.HTTPError as exc: logging.info("hooks.kernel_connect HTTPError: %d", exc.response.status_code) if exc.response.status_code == http.client.NOT_FOUND: payload = {k: v for k, v in payload.items() if v} response = hooks.kernel_connect(endpoint="{}{}".format( entity_url, _id), method="PUT", data=payload) else: raise exc else: _metadata = response.json()["metadata"] payload = { k: v for k, v in payload.items() if _metadata.get(k) or _metadata.get(k) == v or v } if DeepDiff(_metadata, payload, ignore_order=True): response = hooks.kernel_connect(endpoint="{}{}".format( entity_url, _id), method="PATCH", data=payload) return response
def update_aop_bundle_items(issn_id, documents_list): executions = [] try: journal_resp = hooks.kernel_connect(f"/journals/{issn_id}", "GET") except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException(str(exc)) else: aop_bundle_id = journal_resp.json().get("aop") if aop_bundle_id is not None: try: aop_bundle_resp = hooks.kernel_connect( f"/bundles/{aop_bundle_id}", "GET") except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException(str(exc)) else: aop_bundle_items = aop_bundle_resp.json()["items"] documents_ids = [document["id"] for document in documents_list] updated_aop_items = [] for aop_item in aop_bundle_items: if aop_item["id"] not in documents_ids: updated_aop_items.append(aop_item) else: Logger.info( 'Movindo ex-Ahead of Print "%s" to bundle', aop_item["id"], ) executions.append({ "pid": aop_item["id"], "bundle_id": aop_bundle_id, "ex_ahead": True, "removed": True, }) update_documents_in_bundle(aop_bundle_id, updated_aop_items) return executions
def create_aop_bundle(bundle_id): try: hooks.kernel_connect("/bundles/" + bundle_id, "PUT") except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException(str(exc)) else: journal_aop_path = "/journals/{}/aop".format(bundle_id[:9]) hooks.kernel_connect(journal_aop_path, "PATCH", {"aop": bundle_id})
def get_or_create_bundle(bundle_id, is_aop): try: return hooks.kernel_connect("/bundles/" + bundle_id, "GET") except requests.exceptions.HTTPError as exc: if is_aop and exc.response.status_code == http.client.NOT_FOUND: create_aop_bundle(bundle_id) try: return hooks.kernel_connect("/bundles/" + bundle_id, "GET") except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException(str(exc), response=exc.response) else: raise LinkDocumentToDocumentsBundleException(str(exc), response=exc.response)
def fetch_data(endpoint, json=True): """ Obtém o JSON do endpoint do Kernel """ kwargs = { "endpoint": endpoint, "method": "GET", } kernel_timeout = Variable.get("KERNEL_FETCH_DATA_TIMEOUT", default_var=None) if kernel_timeout: kwargs["timeout"] = int(kernel_timeout) if json: return kernel_connect(**kwargs).json() else: return kernel_connect(**kwargs).content
def register_document_to_documentsbundle(bundle_id, payload): """ Relaciona documento com seu fascículo(DocumentsBundle). Utiliza a endpoint do Kernel /bundles/{{ DUNDLE_ID }} """ try: response = hooks.kernel_connect("/bundles/%s/documents" % bundle_id, "PUT", payload) return response except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException(str(exc)) from None
def get_document_manifest(doc_id): try: document_manifest = hooks.kernel_connect( "/documents/" + doc_id + "/manifest", "GET" ) except requests.exceptions.HTTPError as exc: raise GetDocManifestFromKernelException( 'Could not GET document "{}" in Kernel : {}'.format( doc_id, str(exc) ) ) from None else: return json.loads(document_manifest.text)
def update_documents_in_bundle(bundle_id, payload): """ Relaciona documento com seu fascículo(DocumentsBundle). Utiliza a endpoint do Kernel /bundles/{{ DUNDLE_ID }} """ Logger.info('Updating Bundle "%s" with Documents: %s', bundle_id, payload) try: response = hooks.kernel_connect( "/bundles/%s/documents" % bundle_id, "PUT", payload) return response except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException(str(exc)) from None
def _try_journal_patch(payload, endpoint): try: response = hooks.kernel_connect( endpoint=endpoint, method="PATCH", data=payload ) logging.info( "Sucesso ao realizar um PATCH no endpoint: %s, payload: %s" % (endpoint, payload) ) return True except requests.exceptions.HTTPError as exc: logging.info( "Erro ao tentar realizar um PATCH no endpoint: %s, payload: %s" % (endpoint, payload) ) return False except Exception as exc: logging.info( "Erro inesperado ao tentar realizar um PATCH no endpoint: %s, payload: %s, erro: %s" % (endpoint, payload, str(exc)) ) return False
def fetch_data(endpoint): """ Obtém o JSON do endpoint do Kernel """ return kernel_connect(endpoint=endpoint, method="GET").json()
def delete_doc_from_kernel(doc_to_delete): try: response = hooks.kernel_connect("/documents/" + doc_to_delete, "DELETE") except requests.exceptions.HTTPError as exc: raise DeleteDocFromKernelException(str(exc)) from None
def link_documents_to_documentsbundle(documents, issn_index_json_path): """ Relaciona documentos com seu fascículos(DocumentsBundle). :param kwargs['documents']: Uma lista de dicionários contento os atributos necessários para a descoberta do fascículo. Exemplo contendo a lista de atributos(mínimo): [ { "scielo_id": "S0034-8910.2014048004923", "issn": "0034-8910", "year": "2014", "volume": "48", "number": "2", "order": "347", }, { "scielo_id": "S0034-8910.2014048004924", "issn": "0034-8910", "year": "2014", "volume": "48", "number": "2", "order": "348", }, { "scielo_id": "S0034-8910.20140078954641", "issn": "1518-8787", "year": "2014", "volume": "02", "number": "2", "order": "978", }, { "scielo_id": "S0034-8910.20140078954641", "issn": "1518-8787", "year": "2014", "volume": "02", "number": "2", "order": "978", "supplement": "1", } ] {"id": "0034-8910-2014-v48-n2", "status":204} Return a list of document linkd or not, something like: [ {'id': 'S0034-8910.2014048004923', 'status': 204}, {'id': 'S0034-8910.20140078954641', 'status': 422}, {'id': 'S0034-8910.20140078923452', 'status': 404}, ] """ Logger.info("link_documents_to_documentsbundle PUT") ret = [] bundle_id = '' bundle_id_doc = {} if documents: Logger.info('Reading ISSN index file %s', issn_index_json_path) with open(issn_index_json_path) as issn_index_file: issn_index_json = issn_index_file.read() issn_index = json.loads(issn_index_json) for doc in documents: try: issn_id = issn_index[doc["issn"]] except KeyError as exc: Logger.info( 'Could not get journal ISSN ID: ISSN id "%s" not found', doc["issn"]) else: bundle_id = issue_id(issn_id=issn_id, year=doc.get("year"), volume=doc.get("volume", None), number=doc.get("number", None), supplement=doc.get("supplement", None)) bundle_id_doc.setdefault(bundle_id, []) payload_doc = {} payload_doc['id'] = doc.get("scielo_id") payload_doc['order'] = doc.get("order") bundle_id_doc[bundle_id].append(payload_doc) def _update_items_list(new_items: list, current_items: list) -> list: """Retorna uma lista links atualizada a partir dos items atuais e dos novos items.""" items = deepcopy(current_items) for new_item in new_items: for index, current_item in enumerate(items): if new_item["id"] == current_item["id"]: items[index] = new_item break else: items.append(new_item) return items for bundle_id, new_items in bundle_id_doc.items(): try: conn_response = kernel_connect("/bundles/" + bundle_id, "GET") current_items = conn_response.json()["items"] payload = _update_items_list(new_items, current_items) if DeepDiff(current_items, payload, ignore_order=True): response = register_document_to_documentsbundle( bundle_id, payload) ret.append({ "id": bundle_id, "status": response.status_code }) logging.info("The bundle %s items list has been updated." % bundle_id) else: logging.info( "The bundle %s items does not need to be updated." % bundle_id) except requests.exceptions.HTTPError as exc: raise LinkDocumentToDocumentsBundleException( str(exc)) from None return ret Logger.info("link_documents_to_documentsbundle OUT")