def evaluate_xml_path(xml_path): errors = [] sgm_xml = None xml_list = None if xml_path is None: errors.append(_('Missing XML location. ')) else: if os.path.isfile(xml_path): if xml_path.endswith('.sgm.xml'): sgm_xml = xml_path elif xml_path.endswith('.xml'): xml_list = [xml_path] else: errors.append(_('Invalid file. XML file required. ')) elif os.path.isdir(xml_path): xml_list = [os.path.join(xml_path, item) for item in os.listdir(xml_path) if item.endswith('.xml')] if len(xml_list) == 0: errors.append(_('Invalid folder. Folder must have XML files. ')) else: errors.append(_('Missing XML location. ')) return sgm_xml, xml_list, errors
def validate_folder(self): msg = self.selected_folder color = 'white' if not self.is_valid_folder(): msg = _('Invalid folder. ') + _('No .xml files was found') color = 'yellow' return msg, color
def report(self): log = [] log.append(_('Report of files')) log.append("") log.append('-'*len(_('Report of files'))) log.append("") log.append(_('Source path') + ': ' + self.src_pkgfiles.path) log.append(_('Package path') + ': ' + self.dest_pkgfiles.path) log.append(_('Source XML name') + ': ' + self.src_pkgfiles.name) log.append(_('Package XML name') + ': ' + self.dest_pkgfiles.name) log.append( text_report.display_labeled_list( _('Total of related files'), text_report.display_pairs_list(self.related_files_copy))) log.append( text_report.display_labeled_list( _('Total of files in package'), text_report.display_pairs_list(self.href_files_copy))) log.append( text_report.display_labeled_list( _('Total of @href in XML'), text_report.display_pairs_list(self.href_replacements))) log.append( text_report.display_labeled_list( _('Total of files not found in package'), self.missing_href_files)) return '\n'.join(log)
def download(self): choice = self.choice.get() msg = ttk.Label(self, text=_("Select one collection to use its journals " "data for the Markup Program")) msg.grid(column=0, row=ROW_MSG) label1 = ttk.Label(self, text=_("Selecionado: {}".format(choice))) label1.grid(column=0, row=ROW_SELECTED) if choice == 'All': choice = None label2 = ttk.Label(self, text=_("Downloading..")) label2.grid(column=0, row=ROW_DOWNLOADING) journals = get_journals_list(self.collections, choice) generate_input_for_markup(journals, self.temp_filename, self.filename) label4 = ttk.Label(self, text=_("Downloaded: {} journals").format( len(journals))) label4.grid(column=0, row=ROW_DOWNLOADED) label3 = ttk.Label(self, text=_("Finished")) label3.grid(column=0, row=ROW_FINISHED)
def is_fulldate(label, dateiso): y, m, d = dateiso[0:4], dateiso[4:6], dateiso[6:8] y = int(dateiso[0:4]) if y.isdigit() else 0 m = int(dateiso[4:6]) if m.isdigit() else 0 d = int(dateiso[6:8]) if d.isdigit() else 0 msg = [] if not y > 0: msg.append( _('{value} is an invalid value for {label}. ').format( value=y, label='year (' + label + ')')) if not 0 < m <= 12: msg.append( _('{value} is an invalid value for {label}. ').format( value=m, label='month (' + label + ')')) if not d <= 31: msg.append( _('{value} is an invalid value for {label}. ').format( value=d, label='day (' + label + ')')) if len(msg) == 0: try: r = datetime(y, m, d) except: msg.append( _('{value} is an invalid value for {label}. ').format( value=d, label=label + ': day ')) return msg
def validations_table(results): r = '' if results is not None: rows = [] for result in results: result = list(result) if len(result) == 3: result.append('') if len(result) == 4: label, status, msg, xml = result rows.append({ 'label': attributes.sps_help(label), 'status': status, 'message': msg, 'xml': xml, _('why it is not a valid message?'): ' ' }) else: logger.debug('validations_table: ', result) r = html_reports.tag( 'div', html_reports.sheet([ 'label', 'status', 'message', 'xml', _('why it is not a valid message?') ], rows, table_style='validation_sheet')) return r
def configure(self): self.master.minsize(400, 200) self.master.title(_('Download journals data')) self.master.wm_iconbitmap(ICON) self.pack() label = ttk.Label(self, text=_('Select a collection:')) label.grid(column=0, row=ROW_SELECT_A_COLLECTION) options = ['All'] options.extend(sorted(self.collections.keys())) self.choice = tk.StringVar(self) self.choice.set(options[0]) combobox = ttk.Combobox(self, width=30, textvariable=self.choice) combobox['values'] = tuple(options) combobox.grid(column=0, row=ROW_COMBOBOX) execute_button = ttk.Button(self, text=_('download'), command=self.download) execute_button.grid(column=0, row=ROW_DOWNLOAD_BUTTON) close_button = ttk.Button(self, text=_('close'), command=lambda: self.master.destroy()) close_button.grid(column=0, row=ROW_CLOSE_BUTTON) self.mainloop()
def validate_doctype(self): sps_version = self.sps_version public_id = self.tree.docinfo.public_id system_id = self.tree.docinfo.system_url if not sps_version: return [] errors = [] dtd_public_id_items = xml_versions.SPS_VERSIONS.get(sps_version) if dtd_public_id_items is None: errors.append( _('{value} is an invalid value for {label}. ').format( value=sps_version, label='article/@specific-use')) return errors if public_id not in dtd_public_id_items: errors.append( _('{value} is an invalid value for {label}. ').format( value=public_id or '', label='DTD PUBLIC ID')) errors.append( _('{requirer} requires {required}. ').format( requirer='SPS version {}'.format(sps_version), required=_(" or ").join(dtd_public_id_items))) return errors _location = None for location in self.locations.get(public_id): if system_id in location: _location = location break if not _location: errors.append( _('{value} is an invalid value for {label}. ').format( value=system_id, label='DTD SYSTEM ID')) return errors
def validate(self, article): self.messages = [] journal_issns = [ issn.lower() for issn in [article.print_issn, article.e_issn] if issn is not None ] year = (article.real_pubdate or article.expected_pubdate or {}).get('year') journal_prefixes = self.journal_prefixes(journal_issns, year) for lang, doi in article.doi_by_lang: if not doi: continue self.validate_issn_in_doi(journal_issns, doi) self.validate_doi_prefix(journal_prefixes, journal_issns, article.journal_title, doi) if self.validate_format(doi): msg = '' if self.is_working: doi_data = self.ws_doi.doi_data(doi) if doi_data is None: msg = _('{} is not registered for any article. ' ).format(doi) else: self.validate_journal_title(article.journal_title, doi, doi_data) self.validate_article_title(article.titles, doi, doi_data) else: msg = _('{} is not working. ').format(self.ws_doi.URL) if msg: self.messages.append( ('doi', validation_status.STATUS_WARNING, msg)) return self.messages
def year(self, article_year): r = [] label_year, value_year = article_year _y = self.refxml.reference.formatted_year if len(self.refxml.year) > 1: r.append( ('year', validation_status.STATUS_FATAL_ERROR, _('Identify as "year" the more recent publication date. '))) if _y is not None: if _y.isdigit(): if _y > value_year: ref_year_label = 'ref/year ({})'.format(_y) art_year_label = '{}/year ({})'.format( label_year, value_year) r.append(('year', validation_status.STATUS_FATAL_ERROR, _('{} should not be greater than {}. ').format( ref_year_label, art_year_label))) elif 's.d' in _y: r.append(('year', validation_status.STATUS_INFO, _y)) elif 's/d' in _y: r.append(('year', validation_status.STATUS_INFO, _y)) elif 's/d' in _y: r.append(('year', validation_status.STATUS_INFO, _y)) else: r.append( ('year', validation_status.STATUS_FATAL_ERROR, _('{value} is not a number nor is in an expected format. ' ).format(value=_y))) return r
def validate_pubtype_and_ref_data(publication_type, label, values): problem = None compl = '' items = [] required = label in attributes.REFERENCE_REQUIRED_SUBELEMENTS.get( publication_type, []) not_allowed = label in attributes.REFERENCE_NOT_ALLOWED_SUBELEMENTS.get( publication_type, []) if required and len(values or []) == 0: problem = _('{requirer} requires {required}. ').format( requirer='@publication-type="' + publication_type + '"', required=label) compl = _('If the reference has no {label}, ignore this message. ' ).format(label=label) items = ['@publication-type', _('the elements of this reference')] elif not_allowed and len(values) > 0: problem = _('{label} is not allowed for {item}. ').format( label=label, item='@publication-type=' + publication_type) items = ['@publication-type', label, ', '.join(values)] if problem is not None: problem += _('Be sure that you have correctly identified: ' ) + ' and/or '.join(items) + '. ' + compl return problem
def surname_validation_result(self): r = [] label = 'surname' label, status, msg = data_validations.is_required_data( label, self.contrib.surname) if status == validation_status.STATUS_OK: msg = self.contrib.surname parts = self.contrib.surname.split(' ') if parts[-1] in attributes.identified_suffixes(): msg = _( '{label} contains invalid {invalid_items_name}: {invalid_items}. ' ).format(label=u'<surname>{v}</surname>'.format( v=self.contrib.surname), invalid_items_name=_('terms'), invalid_items=parts[-1]) msg += _( u'{value} should be identified as {label}, if {term} is the surname, ignore this message. ' ).format(value=parts[-1], label=u' <suffix>' + parts[-1] + '</suffix>', term=parts[-1]) status = validation_status.STATUS_ERROR r.append((label, status, msg)) _test_number = data_validations.warn_unexpected_numbers( label, self.contrib.surname) if _test_number is not None: r.append(_test_number) return r
def articles_dates_report(self): labels = [ 'name', '@article-type', 'received', 'accepted', 'receive to accepted (days)', 'SciELO date', 'editorial date', 'accepted to SciELO (days)', 'accepted to nowadays (days)' ] items = [] for xml_name, doc in self.articles: values = [] values.append(xml_name) values.append(doc.article_type) values.append(utils.display_datetime(doc.received_dateiso)) values.append(utils.display_datetime(doc.accepted_dateiso)) values.append(str(doc.history_days)) values.append( utils.display_datetime(doc.isoformat(doc.real_pubdate))) values.append( utils.display_datetime(doc.isoformat(doc.expected_pubdate))) values.append(str(doc.accepted_to_real_in_days)) values.append(str(doc.accepted_to_nowadays_in_days)) items.append(html_reports.label_values(labels, values)) article_dates = html_reports.sheet(labels, items, 'dbstatus') labels = [_('year'), _('location')] items = [] for year in sorted(self.years.keys()): values = [] values.append(year) values.append(self.years[year]) items.append(html_reports.label_values(labels, values)) reference_dates = html_reports.sheet(labels, items, 'dbstatus') return html_reports.tag( 'h4', _('Articles Dates Report')) + article_dates + reference_dates
def _name_error(self, xml_filename, separator): name_error = '' new_name, ign = os.path.splitext(os.path.basename(xml_filename)) if '_' in new_name or '.' in new_name: name_error = ( rst_title(_('Name errors')) + _('{} has forbidden characters, which are {}').format( new_name, '_.') + separator) return name_error
def display_report(report_filename): encoding.display_message( _('Report:\n {filename}').format(filename=report_filename)) try: webbrowser.open('file://' + report_filename, new=2) except Exception as e: encoding.display_message( _("Unable to open {} automatically. Open it manually. ".format( report_filename))) encoding.report_exception(e)
def previous_authors(self): r = [] q_previous = self.refxml.xml.count('_' * 6) if len(self.refxml.person_group_nodes) == 0 and q_previous > 0: r.append(( 'person-group', validation_status.STATUS_FATAL_ERROR, _('{} indicates previous authors. ').format('_' * 6) + _('Only in element-citation, replace ______ by person-group identifying the previous authors data such given-names, surnames etc. ' ))) return r
def validate_package(self): encoding.display_message( _('Validate package ({} files)').format(len(self.pkg.articles))) results = {} for name in sorted(self.pkg.articles.keys()): encoding.display_message(_('Validate {name}').format(name=name)) results[name] = self.validate_package_item(self.pkg.articles[name], self.pkg.files[name], self.pkg.outputs[name]) return results
def report_articles_merging_conflicts(self): if not hasattr(self, '_report_articles_merging_conflicts'): merging_errors = [] if len(self.docs_merger.titaut_conflicts) + len( self.docs_merger.name_order_conflicts) > 0: keys = list(self.docs_merger.titaut_conflicts.keys()) + list( self.docs_merger.name_order_conflicts.keys()) keys = sorted(list(set(keys))) merging_errors = [ html_reports.p_message( validation_status.STATUS_BLOCKING_ERROR + ': ' + _('Unable to update because the registered article data and the package article data do not match. ' )) ] articles = self.docs_merger.articles registered_articles = self.docs_merger.registered_articles for name in keys: labels = [ name, _('title/author conflicts'), _('name/order conflicts') ] values = [ article_data_reports.display_article_data_to_compare( articles.get(name)) ] articles_in_conflict = [] for reg_name, art in self.docs_merger.titaut_conflicts.get( name, {}).items(): articles_in_conflict.append( article_data_reports. display_article_data_to_compare(art)) values.append(''.join(articles_in_conflict)) articles_in_conflict = [] for pkg_name, art in self.docs_merger.name_order_conflicts.get( name, {}).items(): articles_in_conflict.append( article_data_reports. display_article_data_to_compare(art)) values.append(''.join(articles_in_conflict)) merging_errors.append( html_reports.sheet( labels, [html_reports.label_values(labels, values)], table_style='dbstatus', html_cell_content=labels)) self._report_articles_merging_conflicts = ''.join(merging_errors) return self._report_articles_merging_conflicts
def main(): parser = argparse.ArgumentParser(description='XML PubMed cli utility') parser.add_argument( "issue_path", nargs="?", default='', help="filesystem path or URL to the issue directory") parser.add_argument( "from_date", nargs="?", default='', help="date iso YYYYMMDD, filter to get selected documents" ) parser.add_argument( "final_date", nargs="?", default='', help="date iso YYYYMMDD, filter to name file" ) parser.add_argument('--debug', action='store_true', help='to register log') parser.add_argument('--loglevel', default='WARNING') args = parser.parse_args() logger.setLevel(args.loglevel.upper()) issue_path = args.issue_path from_date = args.from_date final_date = args.final_date debug = args.debug if not issue_path: issue_path, from_date = read_form_inputs() if issue_path is None: sys.exit("No issue path was informed. Unable to continue. ") final_date = utils.now()[0] debug = False errors = [] if not os.path.isdir(issue_path): errors.append(_('issue path is not a folder')) if len(errors) == 0: config = xc_config.Configuration() ucisis = dbm_isis.UCISIS( dbm_isis.CISIS(config.cisis1030), dbm_isis.CISIS(config.cisis1660)) if ucisis.is_available: issue_stuff = IssueStuff(ucisis, issue_path, from_date, final_date) pubmed_xml_maker = PubMedXMLMaker(issue_stuff, XSL) pubmed_xml_maker.debug = debug pubmed_xml_maker.execute_procedures() else: errors.append(_('cisis expected')) if len(errors) > 0: print('\n'.join(errors))
def spf_message(self): if not self.sps_pkg_info: return "" ftp = "" if self.sps_pkg_info.get("server"): ftp = _("(FTP: {} | User: {})").format( self.sps_pkg_info.get("server"), self.sps_pkg_info.get("user", '')) return html_reports.p_message( _("[INFO] {} is available for SPF {}").format( self.sps_pkg_info.get("file"), ftp))
def articles_affiliations_report(self): r = html_reports.tag('h4', _('Affiliations Report')) items = [] for label, occs in self.compiled_affiliations.items(): items.append({ 'label': label, 'quantity': str(len(occs)), _('files'): sorted(list(set(occs))) }) r += html_reports.sheet( ['label', 'quantity', _('files')], items, 'dbstatus') return r
def xml_list(self): r = '' r += u'<p>{}: {}</p>'.format(_('XML path'), self.package_folder.path) r += u'<p>{}: {}</p>'.format(_('Total of XML files'), len(self.package_folder.pkgfiles_items)) files = '' for name, pkgfiles in self.package_folder.pkgfiles_items.items(): files += '<li>{}</li>'.format( html_reports.format_list(name, 'ol', pkgfiles.files)) r += '<ol>{}</ol>'.format(files) return u'<div class="xmllist">{}</div>'.format(r)
def _err_messages(self, valid_dtd, name_error): errors = [] if self.validator.xml_validator is None: err_msg = validation_status.STATUS_FATAL_ERROR err_msg += ' ' + _('XML file is invalid') + '\n' errors.append(err_msg) if not valid_dtd: errors.append(_('XML file has DTD errors') + '\n') if len(name_error) > 0: err_msg = validation_status.STATUS_FATAL_ERROR err_msg += ' ' + _('XML file has name errors') + '\n' errors.append(err_msg) return errors
def validate_doi_prefix(self, journal_prefixes, journal_issns, journal_title, article_doi): prefix = article_doi[:article_doi.find('/')] if len(journal_prefixes) > 0 and \ prefix not in journal_prefixes: self.messages.append( ('doi', validation_status.STATUS_FATAL_ERROR, _('{value} is an invalid value for {label}. ').format( value=prefix, label=_('doi prefix')) + _('{label} must starts with: {expected}. ').format( label='doi', expected=_(' or ').join(journal_prefixes)))) elif len(journal_prefixes) == 0: publisher_by_issn = self.ws_doi.journal_publisher_by_issn( journal_issns) or '' publisher_by_prefix = self.ws_doi.journal_publisher_by_doi_prefix( prefix) or '' _publisher_by_issn = publisher_by_issn.lower() _publisher_by_prefix = publisher_by_prefix.lower() if (_publisher_by_issn not in _publisher_by_prefix and _publisher_by_prefix not in _publisher_by_issn): msgs = [ article_doi, _('{value} is an invalid value for {label}. ').format( value=prefix, label=_('doi prefix')), _('"{}" belongs to {}. ').format(prefix, publisher_by_prefix), _('DOI Publisher for {}: {}. ').format( journal_title, publisher_by_issn) ] self.messages.append( ('doi', validation_status.STATUS_FATAL_ERROR, msgs))
def get_errors_if_xml_not_found(xml_path): """ Verifica se a pasta contém arquivos XML """ errors = [] if xml_path is None: errors.append(_('Missing XML location. ')) else: if os.path.isfile(xml_path): if not xml_path.endswith('.xml'): errors.append(_('Invalid file. XML file required. ')) elif not is_valid_xml_dir(xml_path): errors.append(_('Invalid folder. Folder must have XML files. ')) return errors
def sources_overview_report(self): labels = ['source', _('location')] h = '' if len(self.reftype_and_sources) > 0: for reftype, sources in self.reftype_and_sources.items(): items = [] h += html_reports.tag('h4', reftype) for source in sorted(sources.keys()): items.append({ 'source': source, _('location'): sources[source] }) h += html_reports.sheet(labels, items, 'dbstatus') return h
def validate(self, article): r = '' if self.is_db_generation: if self.issue_error_msg is not None: r = validation_status.STATUS_BLOCKING_ERROR + ': ' + _( 'Unable to identify {unidentified}. ').format( unidentified=_('issue')) r += self.issue_error_msg elif self.issue_models: r = self.issue_models.validate_article_issue_data(article) result = validations_module.ValidationsResult() result.message = r return result
def spf_message(self): if not self.sps_pkg_info: return "" result = False if self.sps_pkg_info.get("server"): result = _("FTP: {} | User: {}").format( self.sps_pkg_info.get("server"), self.sps_pkg_info.get("user", '')) elif self.sps_pkg_info.get("file"): result = os.path.isfile(self.sps_pkg_info.get("file")) return html_reports.p_message( _("[INFO] {} is available for SPF ({})").format( self.sps_pkg_info.get("file"), result))
def _is_valid_orcid(self, orcid, contrib_name): contrib_orcid_url = '{}{}'.format(self.ORCID_MAIN_URL, orcid) if self.is_available_orcid_website: if not self.ws_requester.is_valid_url(contrib_orcid_url): return ( 'contrib-id', validation_status.STATUS_FATAL_ERROR, _('{value} is an invalid value for {label}. ').format( value=orcid, label='ORCID')) return ('contrib-id', validation_status.STATUS_WARNING, _('Unable to check if {} belongs to {}. ').format( html_reports.link(contrib_orcid_url, orcid), contrib_name.fullname))
def report_rejected_articles(self): if self.docs_merger.rejected_articles: r = [html_reports.tag('h3', _('Rejected documents'))] r.append( html_reports.tag( 'p', _('These documents were rejected because they are not ' '"ahead of print" anymore, they were published in a ' 'regular issue, ' 'so they are not allowed to be reinserted as ' '"ahead of print".'), 'blockingerror')) for name in self.docs_merger.rejected_articles: r.append(html_reports.tag('p', name)) return ''.join(r) return ''