def reading_article_xml(file_xml_path, move_success=True): article = files.read_file(file_xml_path) obj_xml = etree.fromstring(article) medias = xml.find_medias(obj_xml) if medias: logger.info("%s possui midias", file_xml_path) if move_success: files.move_xml_conversion2success( file_xml_path.replace(config.get("CONVERSION_PATH"), ""))
def conversion_article_xml(file_xml_path): article = files.read_file(file_xml_path) obj_xml = etree.fromstring(article) obj_html_body = xml.parser_body_xml(obj_xml) # sobrecreve o html escapado anterior pelo novo xml tratado remove = obj_xml.find("body/p") remove.getparent().replace(remove, obj_html_body) new_file_xml_path = os.path.join( config.get("CONVERSION_PATH"), os.path.split(file_xml_path)[1] ) files.write_file(new_file_xml_path, etree.tostring(obj_xml).decode("utf-8"))
def manage_error_file(errors, err_file, converted_file): if os.path.isfile(err_file): try: os.unlink(err_file) except: pass if errors: msg = [] for err, data in errors.items(): msg.append(err) msg.extend([ "{}:{}".format(ln, text) for ln, text in zip(data["lineno"], data["message"]) ]) files.write_file( err_file, "%s %s\n%s" % (files.read_file(converted_file), "=" * 80, "\n".join(msg)), )
def get_document_renditions( folder: str, file_prefix: str, storage: object ) -> List[dict]: """Faz o envio das manifestações do pacote com base no arquivo manifest.json e retorna o resultado do envio destas manifestações para o object store.""" mimetypes = MimeTypes() _renditions = [] try: _manifest_json = json.loads( files.read_file(os.path.join(folder, "manifest.json")) ) except Exception as exc: logger.error("Could not read manifest: %s", str(exc)) else: _manifest = {lang: urlparse(url).path for lang, url in _manifest_json.items()} logger.debug("Renditions lang and legacy url: %s", _manifest) for lang, legacy_url in _manifest.items(): rendition = os.path.basename(legacy_url) _mimetype = mimetypes.guess_type(rendition)[0] _rendition_path = os.path.join(folder, rendition) _rendition = { "filename": rendition, "url": storage.register( _rendition_path, file_prefix, _manifest.get(lang) ), "size_bytes": os.path.getsize(_rendition_path), "mimetype": _mimetype, "lang": lang, } _renditions.append(_rendition) return _renditions
def test_read_file(self): data = files.read_file( os.path.join(SAMPLES_PATH, "S0036-36341997000100001.xml")) self.assertIn("0036-3634", data)
def read_json_file(file_path: str) -> List[dict]: """Ler um arquivo JSON e retorna o resultado em formato de estruturas Python""" return json.loads(files.read_file(file_path))