Ejemplo n.º 1
0
def evaluate_xml_path(xml_path):
    errors = []
    sgm_xml = None
    xml_list = None

    if xml_path is None:
        errors.append(_('Missing XML location. '))
    else:
        if os.path.isfile(xml_path):
            if xml_path.endswith('.sgm.xml'):
                sgm_xml = xml_path
            elif xml_path.endswith('.xml'):
                xml_list = [xml_path]
            else:
                errors.append(_('Invalid file. XML file required. '))
        elif os.path.isdir(xml_path):
            xml_list = [os.path.join(xml_path, item)
                        for item in os.listdir(xml_path)
                        if item.endswith('.xml')]

            if len(xml_list) == 0:
                errors.append(_('Invalid folder. Folder must have XML files. '))
        else:
            errors.append(_('Missing XML location. '))
    return sgm_xml, xml_list, errors
Ejemplo n.º 2
0
 def validate_folder(self):
     msg = self.selected_folder
     color = 'white'
     if not self.is_valid_folder():
         msg = _('Invalid folder. ') + _('No .xml files was found')
         color = 'yellow'
     return msg, color
Ejemplo n.º 3
0
 def report(self):
     log = []
     log.append(_('Report of files'))
     log.append("")
     log.append('-'*len(_('Report of files')))
     log.append("")
     log.append(_('Source path') + ':   ' + self.src_pkgfiles.path)
     log.append(_('Package path') + ':  ' + self.dest_pkgfiles.path)
     log.append(_('Source XML name') + ': ' + self.src_pkgfiles.name)
     log.append(_('Package XML name') + ': ' + self.dest_pkgfiles.name)
     log.append(
         text_report.display_labeled_list(
             _('Total of related files'),
             text_report.display_pairs_list(self.related_files_copy)))
     log.append(
         text_report.display_labeled_list(
             _('Total of files in package'),
             text_report.display_pairs_list(self.href_files_copy)))
     log.append(
         text_report.display_labeled_list(
             _('Total of @href in XML'),
             text_report.display_pairs_list(self.href_replacements)))
     log.append(
         text_report.display_labeled_list(
             _('Total of files not found in package'),
             self.missing_href_files))
     return '\n'.join(log)
Ejemplo n.º 4
0
    def download(self):
        choice = self.choice.get()

        msg = ttk.Label(self,
                        text=_("Select one collection to use its journals "
                               "data for the Markup Program"))
        msg.grid(column=0, row=ROW_MSG)

        label1 = ttk.Label(self, text=_("Selecionado: {}".format(choice)))
        label1.grid(column=0, row=ROW_SELECTED)

        if choice == 'All':
            choice = None
        label2 = ttk.Label(self, text=_("Downloading.."))
        label2.grid(column=0, row=ROW_DOWNLOADING)
        journals = get_journals_list(self.collections, choice)
        generate_input_for_markup(journals, self.temp_filename, self.filename)

        label4 = ttk.Label(self,
                           text=_("Downloaded: {} journals").format(
                               len(journals)))
        label4.grid(column=0, row=ROW_DOWNLOADED)

        label3 = ttk.Label(self, text=_("Finished"))
        label3.grid(column=0, row=ROW_FINISHED)
Ejemplo n.º 5
0
def is_fulldate(label, dateiso):
    y, m, d = dateiso[0:4], dateiso[4:6], dateiso[6:8]
    y = int(dateiso[0:4]) if y.isdigit() else 0
    m = int(dateiso[4:6]) if m.isdigit() else 0
    d = int(dateiso[6:8]) if d.isdigit() else 0
    msg = []
    if not y > 0:
        msg.append(
            _('{value} is an invalid value for {label}. ').format(
                value=y, label='year (' + label + ')'))
    if not 0 < m <= 12:
        msg.append(
            _('{value} is an invalid value for {label}. ').format(
                value=m, label='month (' + label + ')'))
    if not d <= 31:
        msg.append(
            _('{value} is an invalid value for {label}. ').format(
                value=d, label='day (' + label + ')'))
    if len(msg) == 0:
        try:
            r = datetime(y, m, d)
        except:
            msg.append(
                _('{value} is an invalid value for {label}. ').format(
                    value=d, label=label + ': day '))
    return msg
Ejemplo n.º 6
0
def validations_table(results):
    r = ''
    if results is not None:
        rows = []
        for result in results:
            result = list(result)
            if len(result) == 3:
                result.append('')
            if len(result) == 4:
                label, status, msg, xml = result
                rows.append({
                    'label': attributes.sps_help(label),
                    'status': status,
                    'message': msg,
                    'xml': xml,
                    _('why it is not a valid message?'): ' '
                })
            else:
                logger.debug('validations_table: ', result)
        r = html_reports.tag(
            'div',
            html_reports.sheet([
                'label', 'status', 'message', 'xml',
                _('why it is not a valid message?')
            ],
                               rows,
                               table_style='validation_sheet'))
    return r
Ejemplo n.º 7
0
    def configure(self):
        self.master.minsize(400, 200)
        self.master.title(_('Download journals data'))
        self.master.wm_iconbitmap(ICON)
        self.pack()

        label = ttk.Label(self, text=_('Select a collection:'))
        label.grid(column=0, row=ROW_SELECT_A_COLLECTION)

        options = ['All']
        options.extend(sorted(self.collections.keys()))
        self.choice = tk.StringVar(self)
        self.choice.set(options[0])
        combobox = ttk.Combobox(self, width=30, textvariable=self.choice)
        combobox['values'] = tuple(options)
        combobox.grid(column=0, row=ROW_COMBOBOX)

        execute_button = ttk.Button(self,
                                    text=_('download'),
                                    command=self.download)
        execute_button.grid(column=0, row=ROW_DOWNLOAD_BUTTON)

        close_button = ttk.Button(self,
                                  text=_('close'),
                                  command=lambda: self.master.destroy())
        close_button.grid(column=0, row=ROW_CLOSE_BUTTON)
        self.mainloop()
Ejemplo n.º 8
0
    def validate_doctype(self):
        sps_version = self.sps_version
        public_id = self.tree.docinfo.public_id
        system_id = self.tree.docinfo.system_url
        if not sps_version:
            return []
        errors = []
        dtd_public_id_items = xml_versions.SPS_VERSIONS.get(sps_version)
        if dtd_public_id_items is None:
            errors.append(
                _('{value} is an invalid value for {label}. ').format(
                    value=sps_version, label='article/@specific-use'))
            return errors
        if public_id not in dtd_public_id_items:
            errors.append(
                _('{value} is an invalid value for {label}. ').format(
                    value=public_id or '', label='DTD PUBLIC ID'))
            errors.append(
                _('{requirer} requires {required}. ').format(
                    requirer='SPS version {}'.format(sps_version),
                    required=_(" or ").join(dtd_public_id_items)))
            return errors

        _location = None
        for location in self.locations.get(public_id):
            if system_id in location:
                _location = location
                break
        if not _location:
            errors.append(
                _('{value} is an invalid value for {label}. ').format(
                    value=system_id, label='DTD SYSTEM ID'))
        return errors
Ejemplo n.º 9
0
 def validate(self, article):
     self.messages = []
     journal_issns = [
         issn.lower() for issn in [article.print_issn, article.e_issn]
         if issn is not None
     ]
     year = (article.real_pubdate or article.expected_pubdate
             or {}).get('year')
     journal_prefixes = self.journal_prefixes(journal_issns, year)
     for lang, doi in article.doi_by_lang:
         if not doi:
             continue
         self.validate_issn_in_doi(journal_issns, doi)
         self.validate_doi_prefix(journal_prefixes, journal_issns,
                                  article.journal_title, doi)
         if self.validate_format(doi):
             msg = ''
             if self.is_working:
                 doi_data = self.ws_doi.doi_data(doi)
                 if doi_data is None:
                     msg = _('{} is not registered for any article. '
                             ).format(doi)
                 else:
                     self.validate_journal_title(article.journal_title, doi,
                                                 doi_data)
                     self.validate_article_title(article.titles, doi,
                                                 doi_data)
             else:
                 msg = _('{} is not working. ').format(self.ws_doi.URL)
             if msg:
                 self.messages.append(
                     ('doi', validation_status.STATUS_WARNING, msg))
     return self.messages
Ejemplo n.º 10
0
 def year(self, article_year):
     r = []
     label_year, value_year = article_year
     _y = self.refxml.reference.formatted_year
     if len(self.refxml.year) > 1:
         r.append(
             ('year', validation_status.STATUS_FATAL_ERROR,
              _('Identify as "year" the more recent publication date. ')))
     if _y is not None:
         if _y.isdigit():
             if _y > value_year:
                 ref_year_label = 'ref/year ({})'.format(_y)
                 art_year_label = '{}/year ({})'.format(
                     label_year, value_year)
                 r.append(('year', validation_status.STATUS_FATAL_ERROR,
                           _('{} should not be greater than {}. ').format(
                               ref_year_label, art_year_label)))
         elif 's.d' in _y:
             r.append(('year', validation_status.STATUS_INFO, _y))
         elif 's/d' in _y:
             r.append(('year', validation_status.STATUS_INFO, _y))
         elif 's/d' in _y:
             r.append(('year', validation_status.STATUS_INFO, _y))
         else:
             r.append(
                 ('year', validation_status.STATUS_FATAL_ERROR,
                  _('{value} is not a number nor is in an expected format. '
                    ).format(value=_y)))
     return r
Ejemplo n.º 11
0
def validate_pubtype_and_ref_data(publication_type, label, values):
    problem = None
    compl = ''
    items = []

    required = label in attributes.REFERENCE_REQUIRED_SUBELEMENTS.get(
        publication_type, [])
    not_allowed = label in attributes.REFERENCE_NOT_ALLOWED_SUBELEMENTS.get(
        publication_type, [])

    if required and len(values or []) == 0:
        problem = _('{requirer} requires {required}. ').format(
            requirer='@publication-type="' + publication_type + '"',
            required=label)
        compl = _('If the reference has no {label}, ignore this message. '
                  ).format(label=label)
        items = ['@publication-type', _('the elements of this reference')]
    elif not_allowed and len(values) > 0:
        problem = _('{label} is not allowed for {item}. ').format(
            label=label, item='@publication-type=' + publication_type)
        items = ['@publication-type', label, ', '.join(values)]
    if problem is not None:
        problem += _('Be sure that you have correctly identified: '
                     ) + ' and/or '.join(items) + '. ' + compl
    return problem
Ejemplo n.º 12
0
 def surname_validation_result(self):
     r = []
     label = 'surname'
     label, status, msg = data_validations.is_required_data(
         label, self.contrib.surname)
     if status == validation_status.STATUS_OK:
         msg = self.contrib.surname
         parts = self.contrib.surname.split(' ')
         if parts[-1] in attributes.identified_suffixes():
             msg = _(
                 '{label} contains invalid {invalid_items_name}: {invalid_items}. '
             ).format(label=u'<surname>{v}</surname>'.format(
                 v=self.contrib.surname),
                      invalid_items_name=_('terms'),
                      invalid_items=parts[-1])
             msg += _(
                 u'{value} should be identified as {label}, if {term} is the surname, ignore this message. '
             ).format(value=parts[-1],
                      label=u' <suffix>' + parts[-1] + '</suffix>',
                      term=parts[-1])
             status = validation_status.STATUS_ERROR
             r.append((label, status, msg))
     _test_number = data_validations.warn_unexpected_numbers(
         label, self.contrib.surname)
     if _test_number is not None:
         r.append(_test_number)
     return r
Ejemplo n.º 13
0
    def articles_dates_report(self):
        labels = [
            'name', '@article-type', 'received', 'accepted',
            'receive to accepted (days)', 'SciELO date', 'editorial date',
            'accepted to SciELO (days)', 'accepted to nowadays (days)'
        ]
        items = []
        for xml_name, doc in self.articles:
            values = []
            values.append(xml_name)
            values.append(doc.article_type)
            values.append(utils.display_datetime(doc.received_dateiso))
            values.append(utils.display_datetime(doc.accepted_dateiso))
            values.append(str(doc.history_days))
            values.append(
                utils.display_datetime(doc.isoformat(doc.real_pubdate)))
            values.append(
                utils.display_datetime(doc.isoformat(doc.expected_pubdate)))
            values.append(str(doc.accepted_to_real_in_days))
            values.append(str(doc.accepted_to_nowadays_in_days))
            items.append(html_reports.label_values(labels, values))
        article_dates = html_reports.sheet(labels, items, 'dbstatus')

        labels = [_('year'), _('location')]
        items = []
        for year in sorted(self.years.keys()):
            values = []
            values.append(year)
            values.append(self.years[year])
            items.append(html_reports.label_values(labels, values))
        reference_dates = html_reports.sheet(labels, items, 'dbstatus')

        return html_reports.tag(
            'h4', _('Articles Dates Report')) + article_dates + reference_dates
Ejemplo n.º 14
0
 def _name_error(self, xml_filename, separator):
     name_error = ''
     new_name, ign = os.path.splitext(os.path.basename(xml_filename))
     if '_' in new_name or '.' in new_name:
         name_error = (
             rst_title(_('Name errors')) +
             _('{} has forbidden characters, which are {}').format(
                 new_name, '_.') + separator)
     return name_error
Ejemplo n.º 15
0
def display_report(report_filename):
    encoding.display_message(
        _('Report:\n  {filename}').format(filename=report_filename))
    try:
        webbrowser.open('file://' + report_filename, new=2)
    except Exception as e:
        encoding.display_message(
            _("Unable to open {} automatically. Open it manually. ".format(
                report_filename)))
        encoding.report_exception(e)
Ejemplo n.º 16
0
 def previous_authors(self):
     r = []
     q_previous = self.refxml.xml.count('_' * 6)
     if len(self.refxml.person_group_nodes) == 0 and q_previous > 0:
         r.append((
             'person-group', validation_status.STATUS_FATAL_ERROR,
             _('{} indicates previous authors. ').format('_' * 6) +
             _('Only in element-citation, replace ______ by person-group identifying the previous authors data such given-names, surnames etc. '
               )))
     return r
Ejemplo n.º 17
0
 def validate_package(self):
     encoding.display_message(
         _('Validate package ({} files)').format(len(self.pkg.articles)))
     results = {}
     for name in sorted(self.pkg.articles.keys()):
         encoding.display_message(_('Validate {name}').format(name=name))
         results[name] = self.validate_package_item(self.pkg.articles[name],
                                                    self.pkg.files[name],
                                                    self.pkg.outputs[name])
     return results
Ejemplo n.º 18
0
    def report_articles_merging_conflicts(self):
        if not hasattr(self, '_report_articles_merging_conflicts'):
            merging_errors = []
            if len(self.docs_merger.titaut_conflicts) + len(
                    self.docs_merger.name_order_conflicts) > 0:

                keys = list(self.docs_merger.titaut_conflicts.keys()) + list(
                    self.docs_merger.name_order_conflicts.keys())
                keys = sorted(list(set(keys)))

                merging_errors = [
                    html_reports.p_message(
                        validation_status.STATUS_BLOCKING_ERROR + ': ' +
                        _('Unable to update because the registered article data and the package article data do not match. '
                          ))
                ]

                articles = self.docs_merger.articles
                registered_articles = self.docs_merger.registered_articles
                for name in keys:
                    labels = [
                        name,
                        _('title/author conflicts'),
                        _('name/order conflicts')
                    ]
                    values = [
                        article_data_reports.display_article_data_to_compare(
                            articles.get(name))
                    ]

                    articles_in_conflict = []
                    for reg_name, art in self.docs_merger.titaut_conflicts.get(
                            name, {}).items():
                        articles_in_conflict.append(
                            article_data_reports.
                            display_article_data_to_compare(art))
                    values.append(''.join(articles_in_conflict))

                    articles_in_conflict = []
                    for pkg_name, art in self.docs_merger.name_order_conflicts.get(
                            name, {}).items():
                        articles_in_conflict.append(
                            article_data_reports.
                            display_article_data_to_compare(art))
                    values.append(''.join(articles_in_conflict))

                    merging_errors.append(
                        html_reports.sheet(
                            labels,
                            [html_reports.label_values(labels, values)],
                            table_style='dbstatus',
                            html_cell_content=labels))
            self._report_articles_merging_conflicts = ''.join(merging_errors)
        return self._report_articles_merging_conflicts
Ejemplo n.º 19
0
def main():

    parser = argparse.ArgumentParser(description='XML PubMed cli utility')
    parser.add_argument(
        "issue_path", nargs="?", default='',
        help="filesystem path or URL to the issue directory")
    parser.add_argument(
        "from_date", nargs="?", default='',
        help="date iso YYYYMMDD, filter to get selected documents"
    )
    parser.add_argument(
        "final_date", nargs="?", default='',
        help="date iso YYYYMMDD, filter to name file"
    )
    parser.add_argument('--debug', action='store_true',
                        help='to register log')
    parser.add_argument('--loglevel', default='WARNING')

    args = parser.parse_args()

    logger.setLevel(args.loglevel.upper())

    issue_path = args.issue_path
    from_date = args.from_date
    final_date = args.final_date
    debug = args.debug

    if not issue_path:
        issue_path, from_date = read_form_inputs()
        if issue_path is None:
            sys.exit("No issue path was informed. Unable to continue. ")
        final_date = utils.now()[0]
        debug = False

    errors = []
    if not os.path.isdir(issue_path):
        errors.append(_('issue path is not a folder'))

    if len(errors) == 0:
        config = xc_config.Configuration()
        ucisis = dbm_isis.UCISIS(
            dbm_isis.CISIS(config.cisis1030), dbm_isis.CISIS(config.cisis1660))

        if ucisis.is_available:
            issue_stuff = IssueStuff(ucisis, issue_path, from_date, final_date)

            pubmed_xml_maker = PubMedXMLMaker(issue_stuff, XSL)
            pubmed_xml_maker.debug = debug
            pubmed_xml_maker.execute_procedures()
        else:
            errors.append(_('cisis expected'))

    if len(errors) > 0:
        print('\n'.join(errors))
Ejemplo n.º 20
0
 def spf_message(self):
     if not self.sps_pkg_info:
         return ""
     ftp = ""
     if self.sps_pkg_info.get("server"):
         ftp = _("(FTP: {} | User: {})").format(
             self.sps_pkg_info.get("server"),
             self.sps_pkg_info.get("user", ''))
     return html_reports.p_message(
         _("[INFO] {} is available for SPF {}").format(
             self.sps_pkg_info.get("file"), ftp))
Ejemplo n.º 21
0
 def articles_affiliations_report(self):
     r = html_reports.tag('h4', _('Affiliations Report'))
     items = []
     for label, occs in self.compiled_affiliations.items():
         items.append({
             'label': label,
             'quantity': str(len(occs)),
             _('files'): sorted(list(set(occs)))
         })
     r += html_reports.sheet(
         ['label', 'quantity', _('files')], items, 'dbstatus')
     return r
Ejemplo n.º 22
0
    def xml_list(self):
        r = ''
        r += u'<p>{}: {}</p>'.format(_('XML path'), self.package_folder.path)
        r += u'<p>{}: {}</p>'.format(_('Total of XML files'),
                                     len(self.package_folder.pkgfiles_items))

        files = ''
        for name, pkgfiles in self.package_folder.pkgfiles_items.items():
            files += '<li>{}</li>'.format(
                html_reports.format_list(name, 'ol', pkgfiles.files))
        r += '<ol>{}</ol>'.format(files)
        return u'<div class="xmllist">{}</div>'.format(r)
Ejemplo n.º 23
0
 def _err_messages(self, valid_dtd, name_error):
     errors = []
     if self.validator.xml_validator is None:
         err_msg = validation_status.STATUS_FATAL_ERROR
         err_msg += ' ' + _('XML file is invalid') + '\n'
         errors.append(err_msg)
     if not valid_dtd:
         errors.append(_('XML file has DTD errors') + '\n')
     if len(name_error) > 0:
         err_msg = validation_status.STATUS_FATAL_ERROR
         err_msg += ' ' + _('XML file has name errors') + '\n'
         errors.append(err_msg)
     return errors
Ejemplo n.º 24
0
 def validate_doi_prefix(self, journal_prefixes, journal_issns,
                         journal_title, article_doi):
     prefix = article_doi[:article_doi.find('/')]
     if len(journal_prefixes) > 0 and \
             prefix not in journal_prefixes:
         self.messages.append(
             ('doi', validation_status.STATUS_FATAL_ERROR,
              _('{value} is an invalid value for {label}. ').format(
                  value=prefix, label=_('doi prefix')) +
              _('{label} must starts with: {expected}. ').format(
                  label='doi', expected=_(' or ').join(journal_prefixes))))
     elif len(journal_prefixes) == 0:
         publisher_by_issn = self.ws_doi.journal_publisher_by_issn(
             journal_issns) or ''
         publisher_by_prefix = self.ws_doi.journal_publisher_by_doi_prefix(
             prefix) or ''
         _publisher_by_issn = publisher_by_issn.lower()
         _publisher_by_prefix = publisher_by_prefix.lower()
         if (_publisher_by_issn not in _publisher_by_prefix
                 and _publisher_by_prefix not in _publisher_by_issn):
             msgs = [
                 article_doi,
                 _('{value} is an invalid value for {label}. ').format(
                     value=prefix, label=_('doi prefix')),
                 _('"{}" belongs to {}. ').format(prefix,
                                                  publisher_by_prefix),
                 _('DOI Publisher for {}: {}. ').format(
                     journal_title, publisher_by_issn)
             ]
             self.messages.append(
                 ('doi', validation_status.STATUS_FATAL_ERROR, msgs))
Ejemplo n.º 25
0
def get_errors_if_xml_not_found(xml_path):
    """
    Verifica se a pasta contém arquivos XML
    """
    errors = []
    if xml_path is None:
        errors.append(_('Missing XML location. '))
    else:
        if os.path.isfile(xml_path):
            if not xml_path.endswith('.xml'):
                errors.append(_('Invalid file. XML file required. '))
        elif not is_valid_xml_dir(xml_path):
            errors.append(_('Invalid folder. Folder must have XML files. '))
    return errors
Ejemplo n.º 26
0
 def sources_overview_report(self):
     labels = ['source', _('location')]
     h = ''
     if len(self.reftype_and_sources) > 0:
         for reftype, sources in self.reftype_and_sources.items():
             items = []
             h += html_reports.tag('h4', reftype)
             for source in sorted(sources.keys()):
                 items.append({
                     'source': source,
                     _('location'): sources[source]
                 })
             h += html_reports.sheet(labels, items, 'dbstatus')
     return h
Ejemplo n.º 27
0
    def validate(self, article):
        r = ''
        if self.is_db_generation:
            if self.issue_error_msg is not None:
                r = validation_status.STATUS_BLOCKING_ERROR + ': ' + _(
                    'Unable to identify {unidentified}. ').format(
                        unidentified=_('issue'))
                r += self.issue_error_msg
            elif self.issue_models:
                r = self.issue_models.validate_article_issue_data(article)

        result = validations_module.ValidationsResult()
        result.message = r
        return result
Ejemplo n.º 28
0
    def spf_message(self):
        if not self.sps_pkg_info:
            return ""
        result = False
        if self.sps_pkg_info.get("server"):
            result = _("FTP: {} | User: {}").format(
                self.sps_pkg_info.get("server"),
                self.sps_pkg_info.get("user", ''))
        elif self.sps_pkg_info.get("file"):
            result = os.path.isfile(self.sps_pkg_info.get("file"))

        return html_reports.p_message(
            _("[INFO] {} is available for SPF ({})").format(
                self.sps_pkg_info.get("file"), result))
Ejemplo n.º 29
0
 def _is_valid_orcid(self, orcid, contrib_name):
     contrib_orcid_url = '{}{}'.format(self.ORCID_MAIN_URL, orcid)
     if self.is_available_orcid_website:
         if not self.ws_requester.is_valid_url(contrib_orcid_url):
             return (
                 'contrib-id',
                 validation_status.STATUS_FATAL_ERROR,
                 _('{value} is an invalid value for {label}. ').format(
                     value=orcid, label='ORCID'))
     return ('contrib-id',
             validation_status.STATUS_WARNING,
             _('Unable to check if {} belongs to {}. ').format(
                 html_reports.link(contrib_orcid_url, orcid),
                 contrib_name.fullname))
Ejemplo n.º 30
0
 def report_rejected_articles(self):
     if self.docs_merger.rejected_articles:
         r = [html_reports.tag('h3', _('Rejected documents'))]
         r.append(
             html_reports.tag(
                 'p',
                 _('These documents were rejected because they are not '
                   '"ahead of print" anymore, they were published in a '
                   'regular issue, '
                   'so they are not allowed to be reinserted as '
                   '"ahead of print".'), 'blockingerror'))
         for name in self.docs_merger.rejected_articles:
             r.append(html_reports.tag('p', name))
         return ''.join(r)
     return ''