def test_write_file(self): read_text = fs_utils.read_file("./tests/fixtures/arquivo-utf8.txt") fs_utils.write_file("./tests/fixtures/arquivo-utf8-written.txt", read_text) written_text = fs_utils.read_file( "./tests/fixtures/arquivo-utf8-written.txt") self.assertIn(written_text, read_text)
def write_etree_to_file(tree: etree.ElementTree, path: str) -> None: """Escreve uma árvore lxml em um arquivo de destino. Também garante que as entidades não serão modificadas por meio da função xml_utils.tostring(etree).""" if tree is None or path is None: return None fs_utils.write_file(path, xml_utils.tostring(tree))
def write(self, dest_file_path, pretty_print=True, dtd_location_type=None): doctype = self.get_doctype(dtd_location_type) if self.xml is None: fs_utils.write_file(dest_file_path, self.original) else: self.xml.write(dest_file_path, encoding="utf-8", method="xml", xml_declaration=self.xml_declaration, pretty_print=pretty_print, doctype=doctype)
def temp_xml_filename(self): temp_filename = os.path.join( self.issue_stuff.temp_path, 'pubmed_tmp_' + os.path.basename(self.pubmed_filename)) xml_content = '<?xml version="1.0" encoding="utf-8"?>\n' xml_content += '<root>' xml_content += self.articles_filenames_xml_content xml_content += self.articles_pids_xml_content xml_content += '</root>' fs_utils.write_file(temp_filename, xml_content) return temp_filename
def load_xml(self): content = fs_utils.read_file(self.file_path) content = xml_utils.insert_break_lines(content) self.tree, self.loading_error = xml_utils.load_xml(content) if self.loading_error: content = xml_utils.numbered_lines(content) if content.startswith("1: <?xml"): content = content[content.find("?>") + 2:].strip() self.loading_error = (self.file_path + "\n\n" + self.loading_error + "\n\n" + content) fs_utils.write_file(self.file_path, content)
def rename(self): self._create_dest_pkgfiles() if self.new_name: logger.debug("PackageNamer._fix_href_values") self._fix_href_values() logger.debug("PackageNamer._rename_href_files") self._rename_href_files() logger.debug("PackageNamer._rename_other_files") self._rename_other_files() logger.debug("PackageNamer.write_file") fs_utils.write_file(self.dest_pkgfiles.filename, self.xml.content)
def _report(self, blocking_error, pkg): msg = html_reports.p_message(blocking_error or "") if not blocking_error: msg = self.pkg_namer.report() img_reports = ImagesOriginReport( self.enhancer.images_origin, self.pkg_namer.href_replacements, pkg.package_folder.path) html_reports.save( self.FILES.sgmxml_outputs.images_report_filename, '', img_reports.report()) fs_utils.write_file( self.FILES.sgmxml_outputs.mkp2xml_report_filename, msg)
def _sgmxml(self): logger.info( "Enhance SGMLXML %s" % self.FILES.src_pkgfiles.filename) try: self.enhancer = SGMLXMLContentEnhancer( self.FILES.src_pkgfiles, SGMLHTML(self.FILES.sgmxml_fname, self.FILES.html_filename) ) except SGMLXMLError as e: logger.exception("%s %s", self.FILES.src_pkgfiles.filename, e) finally: fs_utils.write_file( self.FILES.src_pkgfiles.filename, self.enhancer.content)
def run(self): if self.status_manager.is_free: self.status_manager.block() scilista_content = self.col_scilista.consume_collection_scilista() if scilista_content: self.config.update_title_and_issue() scilista_content = sort_scilista(scilista_content) fs_utils.write_file(self.config.gerapadrao_scilista, scilista_content) self._gerapadrao(scilista_content) self._update_web_site(scilista_content) else: self.status_manager.free() else: self.mail_gerapadrao_is_busy()
def structure_validation_report(self, dtd_report_filename): status = None content = _('Validates fine') errors = [] dtd_is_valid, errors = self.validator.validate_structure() if errors: if self.validator.xml_validator is None: status = validation_status.STATUS_BLOCKING_ERROR else: status = validation_status.STATUS_FATAL_ERROR errors += self.validator.validate_doctype() content = '\n' + status + '\n' content += '\n'.join(errors) + '\n' * 10 fs_utils.write_file(dtd_report_filename, content) return len(errors) == 0
def validate_style(self, xml_obj, report_filename): if os.path.isfile(report_filename): os.unlink(report_filename) transformed = None if xml_obj: transformed = xml_utils.transform(xml_obj, self.dtd_files.xsl_prep_report) if transformed: transformed = xml_utils.transform(transformed, self.dtd_files.xsl_report) xml_utils.write(report_filename, transformed) result = fs_utils.read_file(report_filename) if not os.path.isfile(report_filename): result = 'ERROR: ' + _('Unable to create') + ' ' + report_filename fs_utils.write_file(report_filename, result) return style_checker_statistics(result)
def validate_structure(self, xml_filename, dtd_report_filename): xml_obj = xml_utils.get_xml_object(xml_filename) if not xml_obj: status = validation_status.STATUS_BLOCKING_ERROR content = "Unable to load {}".format(xml_filename) else: valid, errors = xml_utils.validate(xml_obj, self.dtd_files.data['dtd_id'], self.dtd_files.real_dtd_path) if errors: status = validation_status.STATUS_FATAL_ERROR content = "\n".join(errors) fs_utils.write_file(dtd_report_filename, content) content = "" if not status else status + '\n' + content + '\n' * 10 fs_utils.write_file(dtd_report_filename, content) return xml_obj, valid
def _sgmxml2xml(self): """ convert o arquivo sgmlxml para xml """ logger.info("Convert sgml to xml") xml_obj, xml_error = xml_utils.load_xml( self.FILES.src_pkgfiles.filename) if xml_error: return sps_version = xml_obj.find(".").get("sps") if sps_version is None: sps_version = xml_versions.get_latest_sps_version()[4:] xml_obj.find(".").set("sps", sps_version) xsl_filepath = xml_versions.xsl_getter(sps_version) result = xml_utils.transform(xml_obj, xsl_filepath) content = xml_utils.insert_namespaces_in_root("article", str(result)) fs_utils.write_file(self.FILES.src_pkgfiles.filename, content)
def write(self, filename, records): path = os.path.dirname(filename) if not os.path.isdir(path): os.makedirs(path) content = self._format_file(records) content = html.unescape(content) content = content.replace(PRESERVECIRC, "\\^") # converterá a entidades, os caracteres utf-8 que não tem # correspondencia em iso-8859-1 content = encoding.encode(content, "iso-8859-1") content = encoding.decode(content, "iso-8859-1") try: fs_utils.write_file(filename, content, 'iso-8859-1') except (UnicodeError, IOError, OSError) as e: logger.error("Nao foi possivel escrever o arquivo %s: %s", filename, e)
def html2table(): _items = [] c = fs_utils.read_file(SYMBOLS_HTML) c = c.replace('<tr', '~BREAK~<tr').replace('</tr>', '</tr>~BREAK~') items = [item for item in c.split('~BREAK~') if item.startswith('<tr') and item.endswith('</tr>') and 'Symbol' in item] for item in items: item = item.replace('<td ', '<td>') cells = item.split('</td><td>') if len(cells) == 7: _char = cells[0] _ent = cells[4] _def = cells[5] _char = _char[0:_char.rfind('</font>')] _char = _char[_char.rfind('>')+1:] _ent = _ent[_ent.rfind('&'):] _ent = _ent[0:_ent.rfind(';')+1] _items.append(_char + '\t' + _ent + '\t' + _def) fs_utils.write_file(SYMBOLS_CSV, '\n'.join(_items))
def get_files(self, package_files_path): msg = ['\n'] msg.append('copying files from ' + package_files_path) path = {} path['.pdf'] = self.paths.web_bases_pdf path['.xml'] = self.paths.web_bases_xml path['.html'] = self.paths.web_htdocs_img_html path['.img'] = self.paths.web_htdocs_img for p in path.values(): if not os.path.isdir(p): os.makedirs(p) for f in os.listdir(package_files_path): file_path = os.path.join(package_files_path, f) if not os.path.isfile(file_path): continue name, ext = os.path.splitext(file_path) destination_path = path.get(ext) if destination_path is None: shutil.copy(file_path, path['.img']) msg.append(' {} => {}'.format(f, path['.img'])) elif ext == '.pdf': pdf_filenames = [f] new_pdf_filename = new_name_for_pdf_filename(f) if new_pdf_filename: pdf_filenames.append(new_pdf_filename) for pdf_filename in pdf_filenames: shutil.copy(file_path, destination_path) msg.append(' {} => {}'.format( f, os.path.join(destination_path, pdf_filename))) elif ext == '.xml': xml_content = self._remove_dtd_url_schema(file_path) if xml_content: fs_utils.write_file(os.path.join(destination_path, f), xml_content) else: shutil.copy(file_path, destination_path) msg.append(' {} => {}'.format(f, path[ext])) else: shutil.copy(file_path, destination_path) msg.append(' {} => {}'.format(f, path[ext])) return '\n'.join(['<p>{}</p>'.format(item) for item in msg])
def validate(self, file_path, outputs): separator = '\n\n\n' + '.........\n\n\n' # erro no nome do arquivo name_error = self._name_error(file_path, separator) # erro de conversao de markup a xml, se aplicavel mkp2xml_error = self._mkp2xml_error(outputs.mkp2xml_report_filename) # cria relatorio de errors de dtd valid_dtd, dtd_errors = self._dtd_error(outputs.dtd_report_filename) # cria relatorio de erros gerais fs_utils.write_file(outputs.err_filename, mkp2xml_error + name_error + dtd_errors) # cria relatorio de errors de estilo xml_f, xml_e, xml_w = self.style_validation_report( outputs.style_report_filename) # conta e monta mensagem de erro sumarizada err_messages = self._err_messages(valid_dtd, name_error) xml_f += len(err_messages) if err_messages: err_messages = ''.join(err_messages) err_messages = rst_title(_('Summary')) + err_messages + separator err_messages = [err_messages.replace('\n', '<br/>')] if outputs.ctrl_filename: # aviso para o Markup de que terminou de gerar os relatorios fs_utils.write_file(outputs.ctrl_filename, 'Finished') elif xml_f + xml_e + xml_w == 0: fs_utils.delete_file_or_folder(outputs.style_report_filename) report_content = err_messages for rep_file in [outputs.err_filename, outputs.style_report_filename]: if os.path.isfile(rep_file): text = extract_report_core(fs_utils.read_file(rep_file)) report_content.append(text) r = validations_module.ValidationsResult() r.message = ''.join(report_content) return r
def validate_package_item(self, article, pkgfiles, outputs): xml_structure_validator = XMLStructureValidator( pkgfiles.filename, article.tree, article.sps) fs_utils.write_file(outputs.data_report_filename, _('Processing... ')) artval = ArticleValidations() artval.journal_validations = self.xml_journal_data_validator.validate( article) artval.issue_validations = self.xml_issue_data_validator.validate( article) artval.xml_structure_validations = xml_structure_validator.validate( pkgfiles.filename, outputs) artval.xml_content_validations, artval.article_display_report = self.xml_content_validator.validate( article, outputs, pkgfiles) if self.xml_content_validator.is_xml_generation: stats = artval.xml_content_validations.statistics_display(False) title = [_('Data Quality Control'), article.new_prefix] fs_utils.write_file( outputs.data_report_filename, html_reports.html( title, stats + artval.xml_content_validations.message)) return artval
def save(filename, title, body, teste=None): r = html(title, body) d = os.path.dirname(filename) if not os.path.isdir(d): os.makedirs(d) fs_utils.write_file(filename, r)
def update_journals_file(self): data = self.ws_requester.request(self.journals_url) if data: fs_utils.write_file(self.downloaded_journals_filename, data)
def _write(self): m = self.message if self.message is not None else '' fs_utils.write_file(self.filename, m)