def report_differences(old, new, deleted_report, added_report, fixed_report, replaced_report):
    old_items = fs_utils.read_file(old)
    old_items = old_items.split('\n')
    print('current:')
    print(len(old_items))

    new_items = fs_utils.read_file(new)
    new_items = new_items.split('\n')
    print('new:')
    print(len(new_items))

    maybe_deleted = []
    for item in old_items:
        if not item in new_items:
            maybe_deleted.append(item)

    maybe_added = []
    for item in new_items:
        if not item in old_items:
            maybe_added.append(item)

    print('=>')
    print([len(maybe_deleted), len(maybe_added)])
    organized_items = classify_items_by_len(maybe_added)

    deleted = []
    replaced = []
    fixed = []
    total = '/' + str(len(maybe_deleted))
    i = 0
    for item in maybe_deleted:
        i += 1
        if str(i).endswith('500') or str(i).endswith('000'):
            print(str(i) + total)

        similar = found_similar(item, maybe_added)
        if similar is None:
            similar = found_similar_2(item, organized_items.get(len(item), []))
        if similar is None:
            deleted.append(item)
        else:
            replaced.append(item + '\n' + similar + '\n')
            fixed.append(similar)

    added = [item for item in maybe_added if not item in fixed]

    fs_utils.write_file(replaced_report, '\n'.join(replaced))
    fs_utils.write_file(fixed_report, '\n'.join(fixed))
    fs_utils.write_file(deleted_report, '\n'.join(deleted))
    fs_utils.write_file(added_report, '\n'.join(added))

    return [len(deleted), len(added), len(fixed)]
Beispiel #2
0
def java_xml_utils_style_validation(xml_filename, doctype, report_filename, xsl_prep_report, xsl_report):
    # STYLE CHECKER REPORT
    register_log('java_xml_utils_style_validation: inicio')
    is_valid_style = False
    xml_report = report_filename.replace('.html', '.xml')
    if os.path.exists(xml_report):
        os.unlink(xml_report)
    if os.path.exists(report_filename):
        os.unlink(report_filename)

    parameters = {}
    bkp_xml_filename = xml_utils.apply_dtd(xml_filename, doctype)
    if java_xml_utils.xml_transform(xml_filename, xsl_prep_report, xml_report, parameters):
        #parameters = {'filename': xml_report}
        java_xml_utils.xml_transform(xml_report, xsl_report, report_filename, parameters)
    else:
        fs_utils.write_file(report_filename, validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to create') + ' ' + report_filename)
    if os.path.isfile(report_filename):
        c = fs_utils.read_file(report_filename)
        is_valid_style = ('Total of errors = 0' in c) and (('Total of warnings = 0' in c) or (not 'Total of warnings =' in c))

    if os.path.isfile(bkp_xml_filename):
        xml_utils.restore_xml_file(xml_filename, bkp_xml_filename)

    if os.path.isfile(xml_report):
        os.unlink(xml_report)
    register_log('java_xml_utils_style_validation: fim')
    return is_valid_style
 def validate(self, xml_filename, dtd_report_filename, style_report_filename):
     self.logger.register('XMLValidator.validate - inicio')
     self.logger.register('XMLValidator.validate - self.validator.setup()')
     self.validator.logger = self.logger
     self.validator.setup(xml_filename)
     self.logger.register('XMLValidator.validate - xml_utils.load_xml')
     xml, e = xml_utils.load_xml(self.validator.xml.content)
     self.logger.register('XMLValidator.validate - self.validator.dtd_validation')
     is_valid_dtd = self.validator.dtd_validation(dtd_report_filename)
     content = ''
     if e is None:
         self.logger.register('XMLValidator.validate - self.validator.style_validation')
         self.validator.style_validation(style_report_filename)
         self.logger.register('XMLValidator.validate - fs_utils.read_file')
         content = fs_utils.read_file(style_report_filename)
     else:
         self.logger.register('XMLValidator.validate - e is not None')
         content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to load {xml}. ').format(xml=xml_filename) + '\n' + e
         fs_utils.write_file(style_report_filename, content)
     self.logger.register('XMLValidator.validate - style_checker_statistics')
     f, e, w = style_checker_statistics(content)
     self.logger.register('XMLValidator.validate - self.validator.finish()')
     self.validator.finish()
     self.logger.register('XMLValidator.validate - fim')
     return (xml, is_valid_dtd, (f, e, w))
 def __init__(self, xml, doctype=None):
     self.xml_filename = xml if not '<' in xml else None
     self.content = xml if '<' in xml else fs_utils.read_file(xml)
     self.doctype = doctype
     self.logger = None
     if doctype is not None:
         self._backup_xml_file()
         self._change_doctype()
Beispiel #5
0
def load_articles(filenames):
    files = {}
    for name, f in filenames.items():
        content = fs_utils.read_file(f)
        xmlcontent = xml_utils.XMLContent(content)
        xmlcontent.normalize()
        xml, error = xml_utils.load_xml(xmlcontent.content)
        if xml is not None:
            files[name] = xml_utils.tostring(xml.getroot())
        else:
            print(' ERROR 1: Invalid XML {}'.format(name))
    return files
def xml_content_transform(content, xsl_filename):
    f = tempfile.NamedTemporaryFile(delete=False)
    f.close()

    fs_utils.write_file(f.name, content)

    f2 = tempfile.NamedTemporaryFile(delete=False)
    f2.close()
    if xml_transform(f.name, xsl_filename, f2.name):
        content = fs_utils.read_file(f2.name)
        os.unlink(f2.name)
    if os.path.exists(f.name):
        os.unlink(f.name)
    return content
def validate_article_xml(xml_filename, dtd_files, dtd_report_filename, style_report_filename):
    is_valid_style = False

    register_log('validate_article_xml: inicio')
    xml, e = xml_utils.load_xml(xml_filename)
    is_valid_dtd = dtd_validation(xml_filename, dtd_report_filename, dtd_files.doctype_with_local_path, dtd_files.database_name)
    content = ''
    if e is None:
        is_valid_style = style_validation(xml_filename, dtd_files.doctype_with_local_path, style_report_filename, dtd_files.xsl_prep_report, dtd_files.xsl_report, dtd_files.database_name)
        content = fs_utils.read_file(style_report_filename)
    else:
        content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to load {xml}. ').format(xml=xml_filename) + '\n' + e
        fs_utils.write_file(style_report_filename, content)
    f, e, w = style_checker_statistics(content)
    register_log('validate_article_xml: fim')
    #open(os.path.dirname(style_report_filename) + '/validate_article_xml.log', 'a+').write('\n'.join(log_items))
    return (xml, is_valid_dtd, (f, e, w))
Beispiel #8
0
def format_reports_for_web(report_path, pkg_path, issue_path):
    if not os.path.isdir(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path):
        os.makedirs(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path)

    #utils.debugging('format_reports_for_web')
    #utils.debugging('content of ' + report_path)
    #utils.debugging('\n'.join(os.listdir(report_path)))

    for f in os.listdir(report_path):
        if f.endswith('.zip') or f == 'xml_converter.txt':
            os.unlink(report_path + '/' + f)
        else:
            #utils.debugging(report_path + '/' + f)
            content = fs_utils.read_file(report_path + '/' + f)
            content = content.replace('file:///' + pkg_path, '/img/revistas/' + issue_path)
            content = content.replace('file:///' + report_path, '/reports/' + issue_path)
            if isinstance(content, unicode):
                content = content.encode('utf-8')
            fs_utils.write_file(converter_env.local_web_app_path + '/htdocs/reports/' + issue_path + '/' + f, content)
def article_data_and_validations_report(journal, article, new_name, package_path, images_generation_report_filename, is_db_generation, is_sgml_generation):
    if article.tree is None:
        sheet_data = None
        article_display_report = None
        article_validation_report = None
        content = validation_status.STATUS_FATAL_ERROR + ': ' + _('Unable to get data of ') + new_name + '.'
    else:
        article_validation = article_validations.ArticleContentValidation(journal, article, is_db_generation, False)
        sheet_data = ArticleSheetData(article, article_validation)
        article_display_report = ArticleDisplayReport(article, sheet_data, package_path, new_name)
        article_validation_report = ArticleValidationReport(article_validation)

        content = []

        img_report_content = ''
        if os.path.isfile(images_generation_report_filename):
            img_report_content = fs_utils.read_file(images_generation_report_filename)
        if len(img_report_content) > 0:
            content.append(html_reports.tag('h1', _('ATTENTION'), 'warning'))
            content.append(html_reports.tag('h1', _('New report: Images Report at the bottom'), 'warning'))

        if is_sgml_generation:
            content.append(article_display_report.issue_header)
            content.append(article_display_report.article_front)

            content.append(article_validation_report.validations(display_all_message_types=False))
            content.append(article_display_report.table_tables)

            content.append(article_display_report.article_body)
            content.append(article_display_report.article_back)

        else:
            content.append(article_validation_report.validations(display_all_message_types=False))
            content.append(article_display_report.table_tables)
            content.append(sheet_data.files_and_href(package_path))

        if len(img_report_content) > 0:
            content.append(img_report_content)

        content = html_reports.join_texts(content)

    return content
Beispiel #10
0
    def transform_content(self, xsl_filename):
        if self.logger is not None:
            self.logger.register('XML.transform_content - inicio')
        f = tempfile.NamedTemporaryFile(delete=False)
        f.close()

        f2 = tempfile.NamedTemporaryFile(delete=False)
        f2.close()

        fs_utils.write_file(f.name, self.content)

        content = ''
        if self.transform_file(f.name, xsl_filename, f2.name):
            content = fs_utils.read_file(f2.name)

        for item in [f.name, f2.name]:
            os.unlink(f.name)
        if self.logger is not None:
            self.logger.register('XML.transform_content - fim')
        return content
Beispiel #11
0
def xml_validate(xml_filename, result_filename, doctype=None):
    #register_log('xml_validate: inicio')
    validation_type = ''

    if doctype is None:
        doctype = ''
    else:
        validation_type = '--validate'

    bkp_xml_filename = xml_utils.apply_dtd(xml_filename, doctype)
    temp_result_filename = TMP_DIR + '/' + os.path.basename(result_filename)
    if os.path.isfile(result_filename):
        os.unlink(result_filename)
    if not os.path.isdir(os.path.dirname(result_filename)):
        os.makedirs(os.path.dirname(result_filename))

    cmd = JAVA_PATH + ' -cp "' + JAR_VALIDATE + '" br.bireme.XMLCheck.XMLCheck "' + xml_filename + '" ' + validation_type + '>"' + temp_result_filename + '"'
    cmd = cmd.encode(encoding=sys.getfilesystemencoding())
    os.system(cmd)

    if os.path.exists(temp_result_filename):
        result = fs_utils.read_file(temp_result_filename, sys.getfilesystemencoding())

        if 'ERROR' in result.upper():
            n = 0
            s = ''
            for line in open(xml_filename, 'r').readlines():
                if n > 0:
                    s += str(n) + ':' + line
                n += 1
            result += '\n' + s.decode('utf-8')
            fs_utils.write_file(temp_result_filename, result)
    else:
        result = 'ERROR: Not valid. Unknown error.\n' + cmd
        fs_utils.write_file(temp_result_filename, result)

    shutil.move(temp_result_filename, result_filename)
    shutil.move(bkp_xml_filename, xml_filename)
    #register_log('xml_validate: fim')
    return not 'ERROR' in result.upper()
Beispiel #12
0
    def copy_files_to_local_web_app(self, xml_path, web_path):
        msg = ['\n']
        msg.append('copying files from ' + xml_path)

        path = {}
        path['pdf'] = web_path + '/bases/pdf/' + self.relative_issue_path
        path['xml'] = web_path + '/bases/xml/' + self.relative_issue_path
        path['html'] = web_path + '/htdocs/img/revistas/' + self.relative_issue_path + '/html/'
        path['img'] = web_path + '/htdocs/img/revistas/' + self.relative_issue_path
        xml_files = [f for f in os.listdir(xml_path) if f.endswith('.xml') and not f.endswith('.rep.xml')]
        xml_content = ''.join([fs_utils.read_file(xml_path + '/' + xml_filename) for xml_filename in os.listdir(xml_path) if xml_filename.endswith('.xml')])

        for p in path.values():
            if not os.path.isdir(p):
                os.makedirs(p)
        for f in os.listdir(xml_path):
            if f.endswith('.xml.bkp') or f.endswith('.xml.replaced.txt') or f.endswith('.rep.xml'):
                pass
            elif os.path.isfile(xml_path + '/' + f):
                ext = f[f.rfind('.')+1:]

                if path.get(ext) is None:
                    if not f.endswith('.tif') and not f.endswith('.tiff'):
                        shutil.copy(xml_path + '/' + f, path['img'])
                        msg.append('  ' + f + ' => ' + path['img'])
                elif ext == 'pdf':
                    pdf_filenames = [f]
                    new_pdf_filename = new_name_for_pdf_filename(f)
                    if new_pdf_filename is not None:
                        pdf_filenames.append(new_pdf_filename)
                    for pdf_filename in pdf_filenames:
                        if os.path.isfile(path[ext] + '/' + pdf_filename):
                            os.unlink(path[ext] + '/' + pdf_filename)
                        shutil.copyfile(xml_path + '/' + f, path[ext] + '/' + pdf_filename)
                        msg.append('  ' + f + ' => ' + path[ext] + '/' + pdf_filename)
                else:
                    shutil.copy(xml_path + '/' + f, path[ext])
                    msg.append('  ' + f + ' => ' + path[ext])
        return '\n'.join(['<p>' + item + '</p>' for item in msg])
    def style_validation(self, report_filename):
        is_valid_style = False
        xml_report = report_filename.replace('.html', '.xml')

        for item in [xml_report, report_filename]:
            if os.path.exists(item):
                os.unlink(item)

        parameters = {}
        if self.xml.transform_file(self.xsl_prep_report, xml_report, parameters):
            xml_transformer_report = java_xml_utils.XML(xml_report, None)
            xml_transformer_report.logger = self.logger
            xml_transformer_report.transform_file(self.xsl_report, report_filename, parameters)
            result = fs_utils.read_file(report_filename)
            if os.path.isfile(xml_report):
                os.unlink(xml_report)

        if not os.path.isfile(report_filename):
            result = 'ERROR: ' + _('Unable to create') + ' ' + report_filename
            fs_utils.write_file(report_filename, result)

        is_valid_style = ('Total of errors = 0' in result) and (('Total of warnings = 0' in result) or (not 'Total of warnings =' in result))

        return is_valid_style
Beispiel #14
0
    def xml_validate(self, result_filename):
        if self.logger is not None:
            self.logger.register('XML.xml_validate - inicio')
        validation_type = '' if self.doctype == '' else '--validate'
        temp_result_filename = self.prepare(result_filename)

        if self.logger is not None:
            self.logger.register('XML.transform_file - command - inicio')
        cmd = JAVA_PATH + ' -cp "' + JAR_VALIDATE + '" br.bireme.XMLCheck.XMLCheck "' + self.xml_filename + '" ' + validation_type + '>"' + temp_result_filename + '"'
        cmd = cmd.encode(encoding=sys.getfilesystemencoding())
        os.system(cmd)
        if self.logger is not None:
            self.logger.register('XML.transform_file - command - fim')

        if os.path.exists(temp_result_filename):
            result = fs_utils.read_file(temp_result_filename, sys.getfilesystemencoding())
            if 'ERROR' in result.upper():
                n = 0
                s = ''
                for line in open(self.xml_filename, 'r').readlines():
                    if n > 0:
                        s += str(n) + ':' + line
                    n += 1
                result += '\n' + s.decode('utf-8')
                fs_utils.write_file(result_filename, result)
                os.unlink(temp_result_filename)
            else:
                shutil.move(temp_result_filename, result_filename)
        else:
            result = 'ERROR: Not valid. Unknown error.\n' + cmd
            fs_utils.write_file(result_filename, result)
        if self.logger is not None:
            self.logger.register('XML.transform_file - command - fim')
        if self.logger is not None:
            self.logger.register('XML.xml_validate - fim')
        return not 'ERROR' in result.upper()
Beispiel #15
0
 def email_header(self, filename):
     header = ''
     if filename is not None:
         filename = CONFIG_PATH + '/' + filename
         header = fs_utils.read_file(filename)
     return header
Beispiel #16
0
def apply_dtd(xml_filename, doctype):
    temp_filename = tempfile.mkdtemp() + '/' + os.path.basename(xml_filename)
    shutil.copyfile(xml_filename, temp_filename)
    content = replace_doctype(fs_utils.read_file(xml_filename), doctype)
    fs_utils.write_file(xml_filename, content)
    return temp_filename
        if len(parts) == 6:
            bad, correct, country_name, country_code, state, city = parts
            results.append('\t'.join([correct, city, state, country_code, country_name]))
    results = list(set(results))
    print('downloaded:')
    print(len(results))
    fs_utils.write_file(wayta_orgname_location_country, '\n'.join(sorted(results)))


execute_update = False
if len(sys.argv) == 1:
    update_wayta_orgname_location_country(source, wayta_normalized_aff, wayta_orgname_location_country)
    counts = report_differences(local_orgname_location_country, wayta_orgname_location_country, deleted_report, added_report, fixed_report, replaced_report)

    print('->')
    print(counts)
    print(sum(counts))

elif len(sys.argv) == 2:
    execute_update = (sys.argv[1] == 'update')
    if sys.argv[1] == 'fix_local':
        fs_utils.write_file(local_orgname_location_country, remove_exceding_blank_spaces(fs_utils.read_file(local_orgname_location_country)))

if execute_update is True:
    import institutions_service
    a = institutions_service.OrgManager()
    a.create_db()
    print('db updated')
else:
    print('No update')
def fix_endoflines(filename, destination):
    r = []
    items = fs_utils.read_file(filename)
    for item in items.split('\n'):
        r.append(item.strip())
    fs_utils.write_file(destination, '\n'.join(sorted(items)))
Beispiel #19
0
    'supplementary-material',
    'table-wrap',
    'verse-group',
]

related_articles_type = ['corrected-article', 'commentary-article', 'press-release', 'retracted-article']

CONTRIB_ID_URLS = {
    'lattes': 'http://lattes.cnpq.br/',
    'orcid': 'http://orcid.org/',
    'researchid': 'http://www.researcherid.com/rid/',
    'scopus': 'https://www.scopus.com/authid/detail.uri?authorId=',
}


LICENSES = read_file(CURRENT_PATH + '/../tables/licenses.csv')
if LICENSES is None:
    LICENSES = []
else:
    LICENSES = LICENSES.split()
    LICENSES.extend([item.replace('http:', 'https:') for item in LICENSES])

SPS_HELP_ELEMENTS = [
    'abbrev-journal-title',
    'abstract',
    'ack',
    'addr-line',
    'aff',
    'app',
    'article-categories',
    'article-id',