def test_put_assets_and_pdfs_in_object_store_reads_each_asset_from_xml(
         self, mk_put_object_in_object_store):
     MockZipFile = MagicMock()
     put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data)
     for asset in self.xml_data["assets"]:
         with self.subTest(asset=asset):
             MockZipFile.read.assert_any_call(asset["asset_id"])
             mk_put_object_in_object_store.assert_any_call(
                 MockZipFile.read.return_value,
                 self.xml_data["issn"],
                 self.xml_data["scielo_id"],
                 asset["asset_id"],
             )
    def test_put_assets_and_pdfs_in_object_store_reads_each_pdf_from_xml(
            self, mk_put_object_in_object_store):
        MockZipFile = MagicMock()
        MockZipFile.read.return_value = b""
        put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data)

        for pdf in self.xml_data["pdfs"]:
            with self.subTest(pdf=pdf):
                MockZipFile.read.assert_any_call(pdf["filename"])
                mk_put_object_in_object_store.assert_any_call(
                    MockZipFile.read.return_value,
                    self.xml_data["issn"],
                    self.xml_data["scielo_id"],
                    pdf["filename"],
                )
    def test_put_assets_and_pdfs_in_object_store_return_data_pdf(
            self, mk_put_object_in_object_store):
        expected = copy.deepcopy(self.xml_data)
        pdfs_size = []
        for pdf in expected["pdfs"]:
            pdf["data_url"] = "http://minio/documentstore/{}".format(
                pdf["filename"])
            pdf["size_bytes"] = random.randint(80000, 100000)
            pdfs_size.append(pdf["size_bytes"])

        mk_read_file = MagicMock(return_value=b"")
        mk_read_file.__len__.side_effect = pdfs_size
        MockZipFile = Mock()
        MockZipFile.read.return_value = mk_read_file
        mk_put_object_in_object_store.side_effect = (
            [None, None] + [pdf["data_url"]
                            for pdf in expected["pdfs"]] + [None])

        result = put_assets_and_pdfs_in_object_store(MockZipFile,
                                                     self.xml_data)
        for expected_pdf, result_pdf in zip(expected["pdfs"], result["pdfs"]):

            self.assertEqual(expected_pdf["filename"], result_pdf["filename"])
            self.assertEqual(expected_pdf["data_url"], result_pdf["data_url"])
            self.assertEqual(expected_pdf["size_bytes"],
                             result_pdf["size_bytes"])
    def test_put_assets_and_pdfs_in_object_store_returns_only_read_assets_and_pdfs(
            self, mk_put_object_in_object_store, MockLogger):
        MockZipFile = MagicMock()
        MockZipFile.read.side_effect = [
            b"",
            KeyError("File not found in the archive"),
            KeyError("File not found in the archive"),
            b"",
        ]
        expected = {
            "assets": self.xml_data["assets"][:1],
            "pdfs": self.xml_data["pdfs"][1:],
        }
        mk_minio_result = [
            "http://minio/documentstore/{}".format(
                expected["assets"][0]["asset_id"]),
            "http://minio/documentstore/{}".format(
                expected["pdfs"][0]["filename"]),
        ]
        mk_put_object_in_object_store.side_effect = mk_minio_result
        expected["assets"][0]["asset_url"] = mk_minio_result[0]
        expected["pdfs"][0]["data_url"] = mk_minio_result[1]
        expected["pdfs"][0]["size_bytes"] = 0

        result = put_assets_and_pdfs_in_object_store(MockZipFile,
                                                     self.xml_data)
        self.assertEqual(result, expected)
 def test_put_assets_and_pdfs_in_object_store_logs_error_if_file_not_found_in_zip(
         self, mk_put_object_in_object_store, MockLogger):
     MockZipFile = MagicMock()
     MockZipFile.read.side_effect = [
         b"",
         KeyError("File not found in the archive"),
         KeyError("File not found in the archive"),
         b"",
     ]
     put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data)
     MockLogger.info.assert_any_call(
         'Could not read asset "%s" from zipfile "%s": %s',
         self.xml_data["assets"][1]["asset_id"],
         MockZipFile,
         "'File not found in the archive'",
     )
     MockLogger.info.assert_any_call(
         'Could not read PDF "%s" from zipfile "%s": %s',
         self.xml_data["pdfs"][0]["filename"],
         MockZipFile,
         "'File not found in the archive'",
     )
Ejemplo n.º 6
0
def register_update_documents(sps_package, xmls_to_preserve):
    """
    Registra/atualiza documentos informados e seus respectivos ativos digitais e
    renditions no Minio e no Kernel.
     list docs_to_preserve: lista de XMLs para manter no Kernel (Registrar ou atualizar)
    """
    Logger.debug("register_update_documents IN")
    with ZipFile(sps_package) as zipfile:
        synchronized_docs_metadata = []
        for i, xml_filename in enumerate(xmls_to_preserve):
            Logger.info(
                'Reading XML file "%s" from ZIP file "%s" [%s/%s]',
                xml_filename,
                sps_package,
                i,
                len(xmls_to_preserve),
            )
            try:
                xml_data = put_xml_into_object_store(zipfile, xml_filename)
            except PutXMLInObjectStoreException as exc:
                Logger.info(
                    'Could not put document "%s" in object store: %s',
                    xml_filename,
                    str(exc),
                )
            else:
                assets_and_pdfs_data = put_assets_and_pdfs_in_object_store(
                    zipfile, xml_data)
                _document_metadata = deepcopy(xml_data)
                _document_metadata.update(assets_and_pdfs_data)
                try:
                    register_update_doc_into_kernel(_document_metadata)

                except RegisterUpdateDocIntoKernelException as exc:
                    Logger.info(
                        'Could not register or update document "%s" in Kernel: %s',
                        xml_filename,
                        str(exc),
                    )
                else:
                    synchronized_docs_metadata.append(xml_data)

    Logger.debug("register_update_documents OUT")

    return synchronized_docs_metadata
Ejemplo n.º 7
0
    def test_put_assets_and_pdfs_in_object_store_return_data_asset(
        self, mk_put_object_in_object_store
    ):
        expected = copy.deepcopy(self.xml_data)
        for asset in expected["assets"]:
            asset["asset_url"] = "http://minio/documentstore/{}".format(
                asset["asset_id"]
            )
        MockZipFile = MagicMock()
        MockZipFile.read.return_value = b""
        mk_put_object_in_object_store.side_effect = [
            asset["asset_url"] for asset in expected["assets"]
        ] + [None, None, None]

        result = put_assets_and_pdfs_in_object_store(MockZipFile, self.xml_data)
        for expected_asset, result_asset in zip(expected["assets"], result["assets"]):

            self.assertEqual(expected_asset["asset_id"], result_asset["asset_id"])
            self.assertEqual(expected_asset["asset_url"], result_asset["asset_url"])
def register_update_documents(sps_package, xmls_to_preserve):
    """
    Registra/atualiza documentos informados e seus respectivos ativos digitais e
    renditions no Minio e no Kernel.
     list docs_to_preserve: lista de XMLs para manter no Kernel (Registrar ou atualizar)
     Não deve cadastrar documentos que não tenha ``scielo-id``
    """

    executions = []

    Logger.debug("register_update_documents IN")
    with ZipFile(sps_package) as zipfile:

        synchronized_docs_metadata = []
        for i, xml_filename in enumerate(xmls_to_preserve):
            Logger.info(
                'Reading XML file "%s" from ZIP file "%s" [%s/%s]',
                xml_filename,
                sps_package,
                i,
                len(xmls_to_preserve),
            )

            execution = {"file_name": xml_filename}

            try:
                xml_data = put_xml_into_object_store(zipfile, xml_filename)
            except (PutXMLInObjectStoreException, Pidv3Exception) as exc:
                Logger.error(
                    'Could not put document "%s" in object store: %s',
                    xml_filename,
                    str(exc),
                )
                execution.update({"failed": True, "error": str(exc)})
            else:
                assets_and_pdfs_data = put_assets_and_pdfs_in_object_store(zipfile, xml_data)
                _document_metadata = deepcopy(xml_data)
                _document_metadata.update(assets_and_pdfs_data)
                try:
                    register_update_doc_into_kernel(_document_metadata)

                except RegisterUpdateDocIntoKernelException as exc:
                    Logger.error(
                        'Could not register or update document "%s" in Kernel: %s',
                        xml_filename,
                        str(exc),
                    )
                    execution.update(
                        {
                            "pid": xml_data.get("scielo_id"),
                            "failed": True,
                            "error": str(exc),
                        }
                    )
                else:
                    synchronized_docs_metadata.append(xml_data)
                    execution.update(
                        {
                            "pid": xml_data.get("scielo_id"),
                            "payload": _document_metadata,
                        }
                    )
            executions.append(execution)

    Logger.debug("register_update_documents OUT")

    return (synchronized_docs_metadata, executions)