def report_differences(old, new, deleted_report, added_report, fixed_report, replaced_report): old_items = fs_utils.read_file(old) old_items = old_items.split('\n') print('current:') print(len(old_items)) new_items = fs_utils.read_file(new) new_items = new_items.split('\n') print('new:') print(len(new_items)) maybe_deleted = [] for item in old_items: if not item in new_items: maybe_deleted.append(item) maybe_added = [] for item in new_items: if not item in old_items: maybe_added.append(item) print('=>') print([len(maybe_deleted), len(maybe_added)]) organized_items = classify_items_by_len(maybe_added) deleted = [] replaced = [] fixed = [] total = '/' + str(len(maybe_deleted)) i = 0 for item in maybe_deleted: i += 1 if str(i).endswith('500') or str(i).endswith('000'): print(str(i) + total) similar = found_similar(item, maybe_added) if similar is None: similar = found_similar_2(item, organized_items.get(len(item), [])) if similar is None: deleted.append(item) else: replaced.append(item + '\n' + similar + '\n') fixed.append(similar) added = [item for item in maybe_added if not item in fixed] fs_utils.write_file(replaced_report, '\n'.join(replaced)) fs_utils.write_file(fixed_report, '\n'.join(fixed)) fs_utils.write_file(deleted_report, '\n'.join(deleted)) fs_utils.write_file(added_report, '\n'.join(added)) return [len(deleted), len(added), len(fixed)]
def java_xml_utils_style_validation(xml_filename, doctype, report_filename, xsl_prep_report, xsl_report): # STYLE CHECKER REPORT register_log('java_xml_utils_style_validation: inicio') is_valid_style = False xml_report = report_filename.replace('.html', '.xml') if os.path.exists(xml_report): os.unlink(xml_report) if os.path.exists(report_filename): os.unlink(report_filename) parameters = {} bkp_xml_filename = xml_utils.apply_dtd(xml_filename, doctype) if java_xml_utils.xml_transform(xml_filename, xsl_prep_report, xml_report, parameters): #parameters = {'filename': xml_report} java_xml_utils.xml_transform(xml_report, xsl_report, report_filename, parameters) else: fs_utils.write_file(report_filename, validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to create') + ' ' + report_filename) if os.path.isfile(report_filename): c = fs_utils.read_file(report_filename) is_valid_style = ('Total of errors = 0' in c) and (('Total of warnings = 0' in c) or (not 'Total of warnings =' in c)) if os.path.isfile(bkp_xml_filename): xml_utils.restore_xml_file(xml_filename, bkp_xml_filename) if os.path.isfile(xml_report): os.unlink(xml_report) register_log('java_xml_utils_style_validation: fim') return is_valid_style
def validate(self, xml_filename, dtd_report_filename, style_report_filename): self.logger.register('XMLValidator.validate - inicio') self.logger.register('XMLValidator.validate - self.validator.setup()') self.validator.logger = self.logger self.validator.setup(xml_filename) self.logger.register('XMLValidator.validate - xml_utils.load_xml') xml, e = xml_utils.load_xml(self.validator.xml.content) self.logger.register('XMLValidator.validate - self.validator.dtd_validation') is_valid_dtd = self.validator.dtd_validation(dtd_report_filename) content = '' if e is None: self.logger.register('XMLValidator.validate - self.validator.style_validation') self.validator.style_validation(style_report_filename) self.logger.register('XMLValidator.validate - fs_utils.read_file') content = fs_utils.read_file(style_report_filename) else: self.logger.register('XMLValidator.validate - e is not None') content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to load {xml}. ').format(xml=xml_filename) + '\n' + e fs_utils.write_file(style_report_filename, content) self.logger.register('XMLValidator.validate - style_checker_statistics') f, e, w = style_checker_statistics(content) self.logger.register('XMLValidator.validate - self.validator.finish()') self.validator.finish() self.logger.register('XMLValidator.validate - fim') return (xml, is_valid_dtd, (f, e, w))
def __init__(self, xml, doctype=None): self.xml_filename = xml if not '<' in xml else None self.content = xml if '<' in xml else fs_utils.read_file(xml) self.doctype = doctype self.logger = None if doctype is not None: self._backup_xml_file() self._change_doctype()
def load_articles(filenames): files = {} for name, f in filenames.items(): content = fs_utils.read_file(f) xmlcontent = xml_utils.XMLContent(content) xmlcontent.normalize() xml, error = xml_utils.load_xml(xmlcontent.content) if xml is not None: files[name] = xml_utils.tostring(xml.getroot()) else: print(' ERROR 1: Invalid XML {}'.format(name)) return files
def xml_content_transform(content, xsl_filename): f = tempfile.NamedTemporaryFile(delete=False) f.close() fs_utils.write_file(f.name, content) f2 = tempfile.NamedTemporaryFile(delete=False) f2.close() if xml_transform(f.name, xsl_filename, f2.name): content = fs_utils.read_file(f2.name) os.unlink(f2.name) if os.path.exists(f.name): os.unlink(f.name) return content
def validate_article_xml(xml_filename, dtd_files, dtd_report_filename, style_report_filename): is_valid_style = False register_log('validate_article_xml: inicio') xml, e = xml_utils.load_xml(xml_filename) is_valid_dtd = dtd_validation(xml_filename, dtd_report_filename, dtd_files.doctype_with_local_path, dtd_files.database_name) content = '' if e is None: is_valid_style = style_validation(xml_filename, dtd_files.doctype_with_local_path, style_report_filename, dtd_files.xsl_prep_report, dtd_files.xsl_report, dtd_files.database_name) content = fs_utils.read_file(style_report_filename) else: content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to load {xml}. ').format(xml=xml_filename) + '\n' + e fs_utils.write_file(style_report_filename, content) f, e, w = style_checker_statistics(content) register_log('validate_article_xml: fim') #open(os.path.dirname(style_report_filename) + '/validate_article_xml.log', 'a+').write('\n'.join(log_items)) return (xml, is_valid_dtd, (f, e, w))
def format_reports_for_web(report_path, pkg_path, issue_path): if not os.path.isdir(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path): os.makedirs(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path) #utils.debugging('format_reports_for_web') #utils.debugging('content of ' + report_path) #utils.debugging('\n'.join(os.listdir(report_path))) for f in os.listdir(report_path): if f.endswith('.zip') or f == 'xml_converter.txt': os.unlink(report_path + '/' + f) else: #utils.debugging(report_path + '/' + f) content = fs_utils.read_file(report_path + '/' + f) content = content.replace('file:///' + pkg_path, '/img/revistas/' + issue_path) content = content.replace('file:///' + report_path, '/reports/' + issue_path) if isinstance(content, unicode): content = content.encode('utf-8') fs_utils.write_file(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path + '/' + f, content)
def article_data_and_validations_report(journal, article, new_name, package_path, images_generation_report_filename, is_db_generation, is_sgml_generation): if article.tree is None: sheet_data = None article_display_report = None article_validation_report = None content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to get data of ') + new_name + '.' else: article_validation = article_validations.ArticleContentValidation(journal, article, is_db_generation, False) sheet_data = ArticleSheetData(article, article_validation) article_display_report = ArticleDisplayReport(article, sheet_data, package_path, new_name) article_validation_report = ArticleValidationReport(article_validation) content = [] img_report_content = '' if os.path.isfile(images_generation_report_filename): img_report_content = fs_utils.read_file(images_generation_report_filename) if len(img_report_content) > 0: content.append(html_reports.tag('h1', _('ATTENTION'), 'warning')) content.append(html_reports.tag('h1', _('New report: Images Report at the bottom'), 'warning')) if is_sgml_generation: content.append(article_display_report.issue_header) content.append(article_display_report.article_front) content.append(article_validation_report.validations(display_all_message_types=False)) content.append(article_display_report.table_tables) content.append(article_display_report.article_body) content.append(article_display_report.article_back) else: content.append(article_validation_report.validations(display_all_message_types=False)) content.append(article_display_report.table_tables) content.append(sheet_data.files_and_href(package_path)) if len(img_report_content) > 0: content.append(img_report_content) content = html_reports.join_texts(content) return content
def transform_content(self, xsl_filename): if self.logger is not None: self.logger.register('XML.transform_content - inicio') f = tempfile.NamedTemporaryFile(delete=False) f.close() f2 = tempfile.NamedTemporaryFile(delete=False) f2.close() fs_utils.write_file(f.name, self.content) content = '' if self.transform_file(f.name, xsl_filename, f2.name): content = fs_utils.read_file(f2.name) for item in [f.name, f2.name]: os.unlink(f.name) if self.logger is not None: self.logger.register('XML.transform_content - fim') return content
def xml_validate(xml_filename, result_filename, doctype=None): #register_log('xml_validate: inicio') validation_type = '' if doctype is None: doctype = '' else: validation_type = '--validate' bkp_xml_filename = xml_utils.apply_dtd(xml_filename, doctype) temp_result_filename = TMP_DIR + '/' + os.path.basename(result_filename) if os.path.isfile(result_filename): os.unlink(result_filename) if not os.path.isdir(os.path.dirname(result_filename)): os.makedirs(os.path.dirname(result_filename)) cmd = JAVA_PATH + ' -cp "' + JAR_VALIDATE + '" br.bireme.XMLCheck.XMLCheck "' + xml_filename + '" ' + validation_type + '>"' + temp_result_filename + '"' cmd = cmd.encode(encoding=sys.getfilesystemencoding()) os.system(cmd) if os.path.exists(temp_result_filename): result = fs_utils.read_file(temp_result_filename, sys.getfilesystemencoding()) if 'ERROR' in result.upper(): n = 0 s = '' for line in open(xml_filename, 'r').readlines(): if n > 0: s += str(n) + ':' + line n += 1 result += '\n' + s.decode('utf-8') fs_utils.write_file(temp_result_filename, result) else: result = 'ERROR: Not valid. Unknown error.\n' + cmd fs_utils.write_file(temp_result_filename, result) shutil.move(temp_result_filename, result_filename) shutil.move(bkp_xml_filename, xml_filename) #register_log('xml_validate: fim') return not 'ERROR' in result.upper()
def copy_files_to_local_web_app(self, xml_path, web_path): msg = ['\n'] msg.append('copying files from ' + xml_path) path = {} path['pdf'] = web_path + '/bases/pdf/' + self.relative_issue_path path['xml'] = web_path + '/bases/xml/' + self.relative_issue_path path['html'] = web_path + '/htdocs/img/revistas/' + self.relative_issue_path + '/html/' path['img'] = web_path + '/htdocs/img/revistas/' + self.relative_issue_path xml_files = [f for f in os.listdir(xml_path) if f.endswith('.xml') and not f.endswith('.rep.xml')] xml_content = ''.join([fs_utils.read_file(xml_path + '/' + xml_filename) for xml_filename in os.listdir(xml_path) if xml_filename.endswith('.xml')]) for p in path.values(): if not os.path.isdir(p): os.makedirs(p) for f in os.listdir(xml_path): if f.endswith('.xml.bkp') or f.endswith('.xml.replaced.txt') or f.endswith('.rep.xml'): pass elif os.path.isfile(xml_path + '/' + f): ext = f[f.rfind('.')+1:] if path.get(ext) is None: if not f.endswith('.tif') and not f.endswith('.tiff'): shutil.copy(xml_path + '/' + f, path['img']) msg.append(' ' + f + ' => ' + path['img']) elif ext == 'pdf': pdf_filenames = [f] new_pdf_filename = new_name_for_pdf_filename(f) if new_pdf_filename is not None: pdf_filenames.append(new_pdf_filename) for pdf_filename in pdf_filenames: if os.path.isfile(path[ext] + '/' + pdf_filename): os.unlink(path[ext] + '/' + pdf_filename) shutil.copyfile(xml_path + '/' + f, path[ext] + '/' + pdf_filename) msg.append(' ' + f + ' => ' + path[ext] + '/' + pdf_filename) else: shutil.copy(xml_path + '/' + f, path[ext]) msg.append(' ' + f + ' => ' + path[ext]) return '\n'.join(['<p>' + item + '</p>' for item in msg])
def style_validation(self, report_filename): is_valid_style = False xml_report = report_filename.replace('.html', '.xml') for item in [xml_report, report_filename]: if os.path.exists(item): os.unlink(item) parameters = {} if self.xml.transform_file(self.xsl_prep_report, xml_report, parameters): xml_transformer_report = java_xml_utils.XML(xml_report, None) xml_transformer_report.logger = self.logger xml_transformer_report.transform_file(self.xsl_report, report_filename, parameters) result = fs_utils.read_file(report_filename) if os.path.isfile(xml_report): os.unlink(xml_report) if not os.path.isfile(report_filename): result = 'ERROR: ' + _('Unable to create') + ' ' + report_filename fs_utils.write_file(report_filename, result) is_valid_style = ('Total of errors = 0' in result) and (('Total of warnings = 0' in result) or (not 'Total of warnings =' in result)) return is_valid_style
def xml_validate(self, result_filename): if self.logger is not None: self.logger.register('XML.xml_validate - inicio') validation_type = '' if self.doctype == '' else '--validate' temp_result_filename = self.prepare(result_filename) if self.logger is not None: self.logger.register('XML.transform_file - command - inicio') cmd = JAVA_PATH + ' -cp "' + JAR_VALIDATE + '" br.bireme.XMLCheck.XMLCheck "' + self.xml_filename + '" ' + validation_type + '>"' + temp_result_filename + '"' cmd = cmd.encode(encoding=sys.getfilesystemencoding()) os.system(cmd) if self.logger is not None: self.logger.register('XML.transform_file - command - fim') if os.path.exists(temp_result_filename): result = fs_utils.read_file(temp_result_filename, sys.getfilesystemencoding()) if 'ERROR' in result.upper(): n = 0 s = '' for line in open(self.xml_filename, 'r').readlines(): if n > 0: s += str(n) + ':' + line n += 1 result += '\n' + s.decode('utf-8') fs_utils.write_file(result_filename, result) os.unlink(temp_result_filename) else: shutil.move(temp_result_filename, result_filename) else: result = 'ERROR: Not valid. Unknown error.\n' + cmd fs_utils.write_file(result_filename, result) if self.logger is not None: self.logger.register('XML.transform_file - command - fim') if self.logger is not None: self.logger.register('XML.xml_validate - fim') return not 'ERROR' in result.upper()
def email_header(self, filename): header = '' if filename is not None: filename = CONFIG_PATH + '/' + filename header = fs_utils.read_file(filename) return header
def apply_dtd(xml_filename, doctype): temp_filename = tempfile.mkdtemp() + '/' + os.path.basename(xml_filename) shutil.copyfile(xml_filename, temp_filename) content = replace_doctype(fs_utils.read_file(xml_filename), doctype) fs_utils.write_file(xml_filename, content) return temp_filename
if len(parts) == 6: bad, correct, country_name, country_code, state, city = parts results.append('\t'.join([correct, city, state, country_code, country_name])) results = list(set(results)) print('downloaded:') print(len(results)) fs_utils.write_file(wayta_orgname_location_country, '\n'.join(sorted(results))) execute_update = False if len(sys.argv) == 1: update_wayta_orgname_location_country(source, wayta_normalized_aff, wayta_orgname_location_country) counts = report_differences(local_orgname_location_country, wayta_orgname_location_country, deleted_report, added_report, fixed_report, replaced_report) print('->') print(counts) print(sum(counts)) elif len(sys.argv) == 2: execute_update = (sys.argv[1] == 'update') if sys.argv[1] == 'fix_local': fs_utils.write_file(local_orgname_location_country, remove_exceding_blank_spaces(fs_utils.read_file(local_orgname_location_country))) if execute_update is True: import institutions_service a = institutions_service.OrgManager() a.create_db() print('db updated') else: print('No update')
def fix_endoflines(filename, destination): r = [] items = fs_utils.read_file(filename) for item in items.split('\n'): r.append(item.strip()) fs_utils.write_file(destination, '\n'.join(sorted(items)))
'supplementary-material', 'table-wrap', 'verse-group', ] related_articles_type = ['corrected-article', 'commentary-article', 'press-release', 'retracted-article'] CONTRIB_ID_URLS = { 'lattes': 'http://lattes.cnpq.br/', 'orcid': 'http://orcid.org/', 'researchid': 'http://www.researcherid.com/rid/', 'scopus': 'https://www.scopus.com/authid/detail.uri?authorId=', } LICENSES = read_file(CURRENT_PATH + '/../tables/licenses.csv') if LICENSES is None: LICENSES = [] else: LICENSES = LICENSES.split() LICENSES.extend([item.replace('http:', 'https:') for item in LICENSES]) SPS_HELP_ELEMENTS = [ 'abbrev-journal-title', 'abstract', 'ack', 'addr-line', 'aff', 'app', 'article-categories', 'article-id',