def get_records(self, expr=None): temp_dir = None if expr is None: base = self.db_filename else: temp_dir = mkdtemp().replace('\\', '/') base = temp_dir + '/' + os.path.basename(self.db_filename) self.cisis.search(self.db_filename, expr, base) r = [] id_filename = base + '.id' if os.path.isfile(base + '.mst'): self.cisis.i2id(base, id_filename) r = IDFile().read(id_filename) if temp_dir is not None: try: fs_utils.delete_file_or_folder(temp_dir) except: pass if os.path.isfile(id_filename): try: os.unlink(id_filename) except: pass return r
def __init__(self, cisis, journal_path): self.cisis = cisis self.journal_path = journal_path self.ahead_filenames = {} self.ahead_folders = [ folder for folder in os.listdir(journal_path) if folder.endswith("ahead") and not folder.startswith("ex-") ] for ahead_folder in self.ahead_folders: # 2013nahead id_path = journal_path + "/" + ahead_folder + "/base_xml/id" if not os.path.isdir(id_path): os.makedirs(id_path) old_id_path = journal_path + "/" + ahead_folder + "/id" if os.path.isdir(old_id_path): for id_filename in os.listdir(old_id_path): if not os.path.isfile(id_path + "/" + id_filename): shutil.copy(old_id_path + "/" + id_filename, id_path) try: import fs_utils fs_utils.delete_file_or_folder(old_id_path) except: pass for id_filename in os.listdir(id_path): if id_filename != "i.id": filename = id_path + "/" + id_filename j = IDFile().id2json(filename) doi = self.doi(j) if doi != "": self.ahead_filenames[doi] = filename
def move_old_id_folder(self): if os.path.isdir(self.old_id_path): if not os.path.isdir(self.id_path): os.makedirs(self.id_path) for item in os.listdir(self.old_id_path): if not os.path.isfile(self.id_path + '/' + item): shutil.copyfile(self.old_id_path + '/' + item, self.id_path + '/' + item) try: fs_utils.delete_file_or_folder(self.old_id_path) except: pass
def convert_package(src_path): xc_process_logger = fs_utils.ProcessLogger() scilista_items = [] is_xml_generation = False scielo_dtd_files = xml_versions.DTDFiles('scielo', converter_env.version) pkg_name = os.path.basename(src_path)[:-4] if not os.path.isdir('./../log'): os.makedirs('./../log') log_package = './../log/' + datetime.now().isoformat().replace(':', '_') + os.path.basename(pkg_name) fs_utils.append_file(log_package, 'preparing') tmp_report_path, wrk_path, scielo_pkg_path, tmp_result_path = package_paths_preparation(src_path) final_result_path = tmp_result_path final_report_path = tmp_report_path fs_utils.append_file(log_package, 'normalized_package') articles, articles_work_area = normalized_package(src_path, tmp_report_path, wrk_path, scielo_pkg_path, converter_env.version) #, converter_env.is_windows doi_services = article_validations.DOI_Services() articles_pkg = pkg_validations.ArticlesPackage(scielo_pkg_path, articles, is_xml_generation) articles_data = pkg_validations.ArticlesData() articles_data.setup(articles_pkg, xc_models.JournalsManager(), db_manager=converter_env.db_manager) articles_set_validations = pkg_validations.ArticlesSetValidations(articles_pkg, articles_data, xc_process_logger) articles_set_validations.validate(doi_services, scielo_dtd_files, articles_work_area) conversion = ArticlesConversion(articles_set_validations, articles_data.articles_db_manager, not converter_env.is_windows) conversion.final_result_path = final_result_path conversion.final_report_path = final_report_path scilista_items = conversion.convert() reports = pkg_validations.ReportsMaker(articles_set_validations, None, conversion, display_report=converter_env.is_windows) reports.processing_result_location = conversion.final_result_path report_location = conversion.final_report_path + '/xml_converter.html' reports.save_report(conversion.final_report_path, 'xml_converter.html', _('XML Conversion (XML to Database)')) if not converter_env.is_windows: format_reports_for_web(conversion.final_report_path, scielo_pkg_path, conversion.acron_issue_label.replace(' ', '/')) if tmp_result_path != final_result_path: fs_utils.delete_file_or_folder(tmp_result_path) os.unlink(log_package) return (scilista_items, conversion.xc_status, reports.validations.statistics_display(), report_location)
def organize(self, reception): for f in os.listdir(reception.download_path): downloaded_item = DownloadedItem(f) folders = self.get_folders(downloaded_item, reception) for item in folders: if not os.path.isdir(item): os.makedirs(item) if fs_utils.unzip(reception.download_path + '/' + f, folders[1]): shutil.copy(reception.download_path + '/' + f, folders[0]) if os.path.isfile(reception.download_path + '/' + f): reception.register(f, folders[1]) os.unlink(reception.download_path + '/' + f) try: fs_utils.delete_file_or_folder(reception.download_path) except: pass
def convert_package(src_path): xc_process_logger = fs_utils.ProcessLogger() scilista_items = [] is_xml_generation = False pkg_name = os.path.basename(src_path)[:-4] if not os.path.isdir('./../log'): os.makedirs('./../log') log_package = './../log/' + datetime.now().isoformat().replace(':', '_') + os.path.basename(pkg_name) fs_utils.append_file(log_package, 'preparing') tmp_result_path = src_path + '_xc' fs_utils.append_file(log_package, 'normalized_package') xml_files = sorted([src_path + '/' + f for f in os.listdir(src_path) if f.endswith('.xml') and not 'incorrect' in f]) pkg_maker = xpmaker.PackageMaker(xml_files, tmp_result_path, 'acron', converter_env.version, is_db_generation=True) pkg_maker.make_sps_package() #, converter_env.is_windows doi_services = article_validations.DOI_Services() articles_pkg = pkg_validations.ArticlesPackage(pkg_maker.scielo_pkg_path, pkg_maker.article_items, is_xml_generation) articles_data = pkg_validations.ArticlesData() articles_data.setup(articles_pkg, xc_models.JournalsManager(), db_manager=converter_env.db_manager) articles_set_validations = pkg_validations.ArticlesSetValidations(articles_pkg, articles_data, xc_process_logger) articles_set_validations.validate(doi_services, pkg_maker.scielo_dtd_files, pkg_maker.article_work_area_items) conversion = ArticlesConversion(articles_set_validations, articles_data.articles_db_manager, not converter_env.is_windows) scilista_items = conversion.convert() #reports.validations.statistics_display() #conversion.statistics_display #report_location #conversion.report_location if tmp_result_path != conversion.results_path: fs_utils.delete_file_or_folder(tmp_result_path) os.unlink(log_package) return (scilista_items, conversion.xc_status, conversion.statistics_display, conversion.report_location)
def xml_transform(xml_filename, xsl_filename, result_filename, parameters={}): #register_log('xml_transform: inicio') error = False temp_result_filename = TMP_DIR + '/' + os.path.basename(result_filename) if not os.path.isdir(os.path.dirname(result_filename)): os.makedirs(os.path.dirname(result_filename)) for f in [result_filename, temp_result_filename]: if os.path.isfile(f): os.unlink(f) tmp_xml_filename = create_temp_xml_filename(xml_filename) cmd = JAVA_PATH + ' -jar "' + JAR_TRANSFORM + '" -novw -w0 -o "' + temp_result_filename + '" "' + tmp_xml_filename + '" "' + xsl_filename + '" ' + format_parameters(parameters) cmd = cmd.encode(encoding=sys.getfilesystemencoding()) os.system(cmd) if not os.path.exists(temp_result_filename): fs_utils.write_file(temp_result_filename, 'ERROR: transformation error.\n' + cmd) error = True shutil.move(temp_result_filename, result_filename) fs_utils.delete_file_or_folder(tmp_xml_filename) #register_log('xml_transform: fim') return (not error)
def execute_converter(package_paths, collection_name=None): collection_names = {} collection_acron = collection_names.get(collection_name) if collection_acron is None: collection_acron = collection_name config = xc.get_configuration(collection_acron) if config is not None: prepare_env(config) invalid_pkg_files = [] mailer = xc.get_mailer(config) if package_paths is None: package_paths, invalid_pkg_files = queue_packages(config.download_path, config.temp_path, config.queue_path, config.archive_path) if package_paths is None: package_paths = [] if not isinstance(package_paths, list): package_paths = [package_paths] for package_path in package_paths: package_folder = os.path.basename(package_path) utils.display_message(package_path) scilista_items = [] xc_status = 'interrupted' stats_msg = '' report_location = None try: scilista_items, xc_status, stats_msg, report_location = convert_package(package_path) except Exception as e: if config.queue_path is not None: fs_utils.delete_file_or_folder(package_path) if config.email_subject_invalid_packages is not None: send_message(mailer, config.email_to_adm, '[Step 1]' + config.email_subject_invalid_packages, config.email_text_invalid_packages + '\n' + package_folder + '\n' + str(e)) if len(package_paths) == 1: raise print(scilista_items) try: acron, issue_id = scilista_items[0].split(' ') if xc_status in ['accepted', 'approved']: if config.collection_scilista is not None: open(config.collection_scilista, 'a+').write('\n'.join(scilista_items) + '\n') if config.is_enabled_transference: transfer_website_files(acron, issue_id, config.local_web_app_path, config.transference_user, config.transference_servers, config.remote_web_app_path) if report_location is not None: if config.email_subject_package_evaluation is not None: results = ' '.join(EMAIL_SUBJECT_STATUS_ICON.get(xc_status, [])) + ' ' + stats_msg link = config.web_app_site + '/reports/' + acron + '/' + issue_id + '/' + os.path.basename(report_location) report_location = '<html><body>' + html_reports.link(link, link) + '</body></html>' transfer_report_files(acron, issue_id, config.local_web_app_path, config.transference_user, config.transference_servers, config.remote_web_app_path) send_message(mailer, config.email_to, config.email_subject_package_evaluation + u' ' + package_folder + u': ' + results, report_location) except Exception as e: if config.email_subject_invalid_packages is not None: send_message(mailer, config.email_to_adm, '[Step 2]' + config.email_subject_invalid_packages, config.email_text_invalid_packages + '\n' + package_folder + '\n' + str(e)) if len(package_paths) == 1: print('exception as finishing') raise if len(invalid_pkg_files) > 0: if config.email_subject_invalid_packages is not None: send_message(mailer, config.email_to, config.email_subject_invalid_packages, config.email_text_invalid_packages + '\n'.join(invalid_pkg_files)) utils.display_message(_('finished'))
def queue_packages(download_path, temp_path, queue_path, archive_path): invalid_pkg_files = [] proc_id = datetime.now().isoformat()[11:16].replace(':', '') temp_path = temp_path + '/' + proc_id queue_path = queue_path + '/' + proc_id pkg_paths = [] if os.path.isdir(temp_path): fs_utils.delete_file_or_folder(temp_path) if os.path.isdir(queue_path): fs_utils.delete_file_or_folder(queue_path) if archive_path is not None: if not os.path.isdir(archive_path): os.makedirs(archive_path) if not os.path.isdir(temp_path): os.makedirs(temp_path) for pkg_name in os.listdir(download_path): if is_valid_pkg_file(download_path + '/' + pkg_name): shutil.copyfile(download_path + '/' + pkg_name, temp_path + '/' + pkg_name) else: pkg_paths.append(pkg_name) fs_utils.delete_file_or_folder(download_path + '/' + pkg_name) for pkg_name in os.listdir(temp_path): queued_pkg_path = queue_path + '/' + pkg_name if not os.path.isdir(queued_pkg_path): os.makedirs(queued_pkg_path) if fs_utils.extract_package(temp_path + '/' + pkg_name, queued_pkg_path): if archive_path is not None: if os.path.isdir(archive_path): shutil.copyfile(temp_path + '/' + pkg_name, archive_path + '/' + pkg_name) pkg_paths.append(queued_pkg_path) else: invalid_pkg_files.append(pkg_name) fs_utils.delete_file_or_folder(queued_pkg_path) fs_utils.delete_file_or_folder(temp_path + '/' + pkg_name) fs_utils.delete_file_or_folder(temp_path) return (pkg_paths, invalid_pkg_files)
def convert_package(src_path): xc_conclusion_msg = '' pkg_xml_fatal_errors = 0 xc_results_report = '' aop_results_report = '' before_conversion_report = '' after_conversion_report = '' registered_scilista_item = None report_components = {} scilista_items = [] xc_status = 'not processed' is_db_generation = True converted = 0 not_converted = 0 total = 0 dtd_files = xml_versions.DTDFiles('scielo', converter_env.version) pkg_name = os.path.basename(src_path)[:-4] if not os.path.isdir('./../log'): os.makedirs('./../log') log_package = './../log/' + datetime.now().isoformat().replace(':', '_') + os.path.basename(pkg_name) fs_utils.append_file(log_package, 'preparing') tmp_report_path, wrk_path, pkg_path, tmp_result_path = package_paths_preparation(src_path) final_result_path = tmp_result_path final_report_path = tmp_report_path fs_utils.append_file(log_package, 'normalized_package') pkg_articles, doc_file_info_items = normalized_package(src_path, tmp_report_path, wrk_path, pkg_path, converter_env.version) pkg = pkg_reports.PkgArticles(pkg_articles, pkg_path) journals_list = xc_models.JournalsList() journal = journals_list.get_journal(pkg.pkg_p_issn, pkg.pkg_e_issn, pkg.pkg_journal_title) fs_utils.append_file(log_package, 'identify_issue') issue_error_msg = pkg.identify_issue(converter_env.db_manager, pkg_name) #FIXME issue = None fs_utils.append_file(log_package, 'pkg.xml_list()') report_components['xml-files'] = pkg.xml_list() scilista_items.append(pkg.acron_issue_label) if issue_error_msg is not None: xc_status = 'rejected' report_components['issue-report'] = issue_error_msg else: fs_utils.append_file(log_package, 'db_article') db_article = xc_models.ArticleDB(converter_env.db_manager.db_isis, pkg.issue_files, xc_models.AopManager(converter_env.db_manager.db_isis, pkg.issue_files.journal_files)) conversion = Conversion(pkg, db_article) fs_utils.append_file(log_package, 'conversion.evaluate_pkg_and_registered_items') conversion.evaluate_pkg_and_registered_items(converter_env.skip_identical_xml) pkg_validator = pkg_reports.ArticlesPkgReport(tmp_report_path, pkg, journal, issue, conversion.previous_registered_articles, is_db_generation) pkg_validator.xc_validations = conversion.xc_validations fs_utils.append_file(log_package, 'pkg_validator.overview_report()') report_components['pkg_overview'] = pkg_validator.overview_report() fs_utils.append_file(log_package, 'pkg_validator.references_overview_report()') report_components['pkg_overview'] += pkg_validator.references_overview_report() fs_utils.append_file(log_package, 'pkg_validator.sources_overview_report()') report_components['references'] = pkg_validator.sources_overview_report() fs_utils.append_file(log_package, 'pkg_validator.issue_report') report_components['issue-report'] = pkg_validator.issue_report conversion.blocking_errors = pkg_validator.blocking_errors fs_utils.append_file(log_package, 'conversion.initial_status_report') before_conversion_report = conversion.initial_status_report() if conversion.blocking_errors == 0: fs_utils.append_file(log_package, 'pkg_validator.validate_articles_pkg_xml_and_data') pkg_validator.validate_articles_pkg_xml_and_data(doc_file_info_items, dtd_files, False, conversion.selected_articles.keys()) pkg_xml_fatal_errors = pkg_validator.pkg_xml_structure_validations.fatal_errors + pkg_validator.pkg_xml_content_validations.fatal_errors fs_utils.append_file(log_package, 'pkg_validator.detail_report') report_components['detail-report'] = pkg_validator.detail_report() fs_utils.append_file(log_package, 'conversion.convert_articles') registered_scilista_item = conversion.convert_articles(pkg_validator) fs_utils.append_file(log_package, 'conversion.pkg_xc_validations.report') report_components['conversion-report'] = conversion.pkg_xc_validations.report() if conversion.pkg_xc_validations.fatal_errors == 0: after_conversion_report = conversion.final_status_report() fs_utils.append_file(log_package, 'Conversion results') xc_results_report = report_status(_('Conversion results'), conversion.conversion_status, 'conversion') fs_utils.append_file(log_package, 'AOP status') aop_results_report = report_status(_('AOP status'), conversion.db.aop_manager.aop_sorted_by_status, 'aop-block') if len(aop_results_report) == 0: aop_results_report = _('this journal has no aop.') final_report_path = pkg.issue_files.base_reports_path final_result_path = pkg.issue_files.issue_path if registered_scilista_item is not None: fs_utils.append_file(log_package, 'pkg.issue_files.copy_files_to_local_web_app()') pkg.issue_files.copy_files_to_local_web_app() fs_utils.append_file(log_package, 'xc_status = get_xc_status()') xc_status = get_xc_status(registered_scilista_item, conversion.pkg_xc_validations.fatal_errors, pkg_xml_fatal_errors, conversion.blocking_errors) if conversion.db.aop_manager.aop_sorted_by_status.get('aop scilista item to update') is not None: for item in conversion.db.aop_manager.aop_sorted_by_status.get('aop scilista item to update'): scilista_items.append(item) total = len(conversion.selected_articles) if conversion.selected_articles is not None else 0 converted = len(conversion.conversion_status.get('converted', [])) if conversion.conversion_status.get('converted', []) is not None else 0 not_converted = len(conversion.conversion_status.get('not converted', [])) if conversion.conversion_status.get('not converted', []) is not None else 0 fs_utils.append_file(log_package, 'conversion.conclusion()') xc_conclusion_msg = ''.join([html_reports.p_message(item) for item in conversion.error_messages]) xc_conclusion_msg += conclusion_message(total, converted, not_converted, xc_status, pkg.acron_issue_label) if len(after_conversion_report) == 0: after_conversion_report = xc_conclusion_msg if converter_env.is_windows: fs_utils.append_file(log_package, 'pkg_reports.processing_result_location') report_components['xml-files'] += pkg_reports.processing_result_location(final_result_path) report_components['db-overview'] = before_conversion_report + after_conversion_report report_components['summary-report'] = xc_conclusion_msg + xc_results_report + aop_results_report fs_utils.append_file(log_package, 'pkg_reports.format_complete_report') xc_validations = pkg_reports.format_complete_report(report_components) content = xc_validations.message if tmp_report_path in content: fs_utils.append_file(log_package, 'content.replace(tmp_report_path, final_report_path)') content = content.replace(tmp_report_path, final_report_path) report_location = final_report_path + '/xml_converter.html' pkg_reports.save_report(report_location, [_('XML Conversion (XML to Database)'), pkg.acron_issue_label], content) if not converter_env.is_windows: fs_utils.append_file(log_package, 'format_reports_for_web') format_reports_for_web(final_report_path, pkg_path, pkg.acron_issue_label.replace(' ', '/')) if tmp_result_path != final_result_path: fs_utils.delete_file_or_folder(tmp_result_path) fs_utils.append_file(log_package, 'antes de return - convert_package') os.unlink(log_package) return (scilista_items, xc_status, xc_validations.statistics_message(), report_location)