def load_message_table():
    ret_val = None
    fname = "get_user_message"
    ocd = opasCentralDBLib.opasCentralDB()
    ocd.open_connection(caller_name=fname)  # make sure connection is open
    sql = f"SELECT * from vw_api_messages;"
    if ocd.db is not None:
        with closing(ocd.db.cursor(buffered=True, dictionary=True)) as curs:
            try:
                curs.execute(sql)
                warnings = curs.fetchwarnings()
                if warnings:
                    for warning in warnings:
                        logger.warning(warning)

                if curs.rowcount >= 1:
                    try:
                        ret_val = curs.fetchall()
                    except Exception as e:
                        print(f"Error: {e}")
                        ret_val = None
            except Exception as e:
                print(f"Error: {e}")
                ret_val = None

    ocd.close_connection(caller_name=fname)

    return ret_val
def main():
    #  open databases
    try:
        stage_ocd = opasCentralDBLib.opasCentralDB(host=STAGE_DB_HOST, password=STAGE_PW, user=STAGE_USER)
        production_ocd = opasCentralDBLib.opasCentralDB(host=PRODUCTION_DB_HOST, password=PROD_PW, user=PROD_USER)
    except Exception as e:
        logger.error(f"Cannot open stage or production databases: {e}.  Terminating without changes")
    else:
        client_id = "2"
        if not stage_ocd.open_connection(caller_name="opasPushSettings"):
            raise Exception(f"Stage DB Connection Error: {STAGE_DB_HOST}")

        if not production_ocd.open_connection(caller_name="opasPushSettings"):
            raise Exception(f"Production DB Connection Error: {PRODUCTION_DB_HOST}")
            
        for config in CLIENT_CONFIGS:
            logger.info(f"Reading '{config}' from Staging DB")
            client_config = stage_ocd.get_client_config(client_id=client_id, client_config_name=config)
            # get old production config as backup
            production_config = production_ocd.get_client_config(client_id=client_id, client_config_name=config)
            if client_config is not None:
                curr_client_config_list_item = client_config.configList[0]
                client_config_settings = curr_client_config_list_item.configSettings
                logger.info(f"{config}: {client_config_settings}")
                if options.preview_bool:
                    print(f"Preview mode...when non-preview it would copy `{config}` to Production DB")
                else:
                    try:
                        print(f"Replacing prod {config}:{production_config.configList[0].configSettings}")
                        production_ocd.save_client_config_item(session_id=curr_client_config_list_item.session_id,
                                                               client_id=curr_client_config_list_item.api_client_id, 
                                                               client_configuration_item=curr_client_config_list_item,
                                                               replace=True)
                    except Exception as e:
                        logger.error(f"Cannot write to production database {e}.  Terminating without changes")
                    else:
                        logger.info(f"Wrote `{config}` to Production DB ({PRODUCTION_DB_HOST})")
            else:
                logger.error(f"Cannot find settings for {config} for client: {client_id}.")
                
     
        stage_ocd.close_connection(caller_name="opasPushSettings")
        production_ocd.close_connection(caller_name="opasPushSettings")

    print ("Run terminating")
Esempio n. 3
0
 def test_opasdb_getsources(self):
     ocd = opasCentralDB()
     sources = ocd.get_sources()
     assert (sources[0] > 100)
     sources = ocd.get_sources(src_code="IJP")
     assert (sources[0] == 1)
     sources = ocd.get_sources(src_type="journal")
     assert (sources[0] > 70)
     sources = ocd.get_sources(src_type="videos")
     assert (sources[0] >= 12)
Esempio n. 4
0
 def test_opasdb_getsourcetypes(self):
     ocd = opasCentralDB()
     sources = ocd.get_sources()
     assert (sources[0] > 100)
     # unlike the API higher level function, both of these src_types--videos and stream--return streams on this direct call
     #  because the database vw_api_productbase_instance_counts view only has the stream information.
     #  see testAPIMetadata.test_3B_meta_videostreams to see the difference which is driven by
     #  query parameter streams
     sources = ocd.get_sources(src_type="videos")
     assert (sources[0] >= 12)
     sources = ocd.get_sources(src_type="stream")
     assert (sources[0] >= 12)
    def test_1_testviews(self):
        ocd = opasCentralDBLib.opasCentralDB()
        dbok = ocd.open_connection(
            caller_name="test_views")  # make sure connection is open
        assert (dbok == True)
        tables = [
            "vw_active_sessions",
            "vw_api_productbase",
            "vw_api_productbase_instance_counts",
            "vw_api_sourceinfodb",
            "vw_api_volume_limits",
            "vw_instance_counts_books",
            "vw_instance_counts_src",
            "vw_latest_session_activity",
            "vw_reports_document_activity",
            "vw_reports_document_views",
            "vw_reports_session_activity",
            "vw_reports_user_searches",
            "vw_stat_cited_crosstab",
            "vw_stat_cited_crosstab_with_details",
            "vw_stat_cited_in_all_years",
            "vw_stat_cited_in_last_5_years",
            "vw_stat_cited_in_last_10_years",
            "vw_stat_cited_in_last_20_years",
            "vw_stat_docviews_crosstab",
            "vw_stat_docviews_last12months",
            "vw_stat_docviews_lastmonth",
            "vw_stat_docviews_lastsixmonths",
            "vw_stat_docviews_lastweek",
            "vw_stat_most_viewed",
            "vw_stat_to_update_solr_docviews",
            # "vw_stat_docviews_lastcalyear" # for now, nothing from last year
        ]

        for table in tables:
            curs = ocd.db.cursor(buffered=True, dictionary=True)
            sql = f"SELECT * from {table} LIMIT 10;"
            try:
                curs.execute(sql)
                row_count = curs.rowcount
                print(f"Found {row_count} rows (limit was 10)")
                sourceData = curs.fetchall()
                assert (len(sourceData) >= 1)
            except:
                print(f"Exception: can't query table {table}")
                assert (False)

        ocd.close_connection(
            caller_name="test_views")  # make sure connection is closed
Esempio n. 6
0
 def test_count_open_sessions(self):
     ocd = opasCentralDB()
     count = ocd.count_open_sessions()
     assert (count > 0)
Esempio n. 7
0
 def test_0_art_year(self):
     ocd = opasCentralDB()
     year = ocd.get_article_year("FD.026.0007A")
     assert (year == 2020)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import unittest
import opasXMLHelper
import opasCentralDBLib

from unitTestConfig import base_api, base_plus_endpoint_encoded

ocd = opasCentralDBLib.opasCentralDB()

class TestStandalonePageExtractFunctions(unittest.TestCase):
    """
    Tests
    
    Note: tests are performed in alphabetical order, hence the function naming
          with forced order in the names.
    
    """
    
    def test_0a_get_article_first_page_data_html_INCOMPLETE(self):
        """
        Retrieve an article; extract first page
        """
        # This old function wasn't used by the code otherwise so removed this call
        #  it retrieves an article but doesn't include search highlighting.
        # data = opasAPISupportLib.get_article_data("ANIJP-DE.009.0189A", fields=None)
        # this newer function includes the search parameters if there were some
        xmlstr = opasXMLHelper.xml_file_to_xmlstr(r"X:\_PEPA1\_PEPa1v\_PEPArchive\AOP\014\AOP.014.0125A(bEXP_ARCH1).XML", dtd_validations=False)
        htmlpages = opasXMLHelper.xml_get_pages_html(xmlstr, offset=0, limit=1, inside="div[@id='body']", env="body", pagebrk="div[@class='pagebreak']", pagenbr="p[@class='pagenumber']")
        extract_size = len(htmlpages)
        orig_size = len(xmlstr)
Esempio n. 9
0
def end_test_session(header):
    from opasCentralDBLib import opasCentralDB
    ocd = opasCentralDB()
    ocd.end_session(session_id=headers["client-session"])
    print("Session End")
Esempio n. 10
0
 def test_0_min_max_from_db(self):
     ocd = opasCentralDB()
     ret_val = ocd.get_min_max_volumes("FD")
     print (ret_val)
Esempio n. 11
0
def main():

    global options  # so the information can be used in support functions

    cumulative_file_time_start = time.time()
    randomizer_seed = None

    # scriptSourcePath = os.path.dirname(os.path.realpath(__file__))

    processed_files_count = 0
    ocd = opasCentralDBLib.opasCentralDB()
    fs = opasFileSupport.FlexFileSystem(key=localsecrets.S3_KEY,
                                        secret=localsecrets.S3_SECRET,
                                        root="pep-web-xml")

    # set toplevel logger to specified loglevel
    logger = logging.getLogger()
    logger.setLevel(options.logLevel)
    # get local logger
    logger = logging.getLogger(programNameShort)

    logger.info('Started at %s',
                datetime.today().strftime('%Y-%m-%d %H:%M:%S"'))
    # logging.basicConfig(filename=logFilename, level=options.logLevel)

    solrurl_docs = None
    #solrurl_refs = None
    solrurl_authors = None
    solrurl_glossary = None
    if options.rootFolder == localsecrets.XML_ORIGINALS_PATH or options.rootFolder == None:
        start_folder = pathlib.Path(localsecrets.XML_ORIGINALS_PATH)
    else:
        start_folder = pathlib.Path(options.rootFolder)

    if 1:  # (options.biblio_update or options.fulltext_core_update or options.glossary_core_update) == True:
        try:
            solrurl_docs = localsecrets.SOLRURL + configLib.opasCoreConfig.SOLR_DOCS  # e.g., http://localhost:8983/solr/    + pepwebdocs'
            solrurl_authors = localsecrets.SOLRURL + configLib.opasCoreConfig.SOLR_AUTHORS
            solrurl_glossary = localsecrets.SOLRURL + configLib.opasCoreConfig.SOLR_GLOSSARY
            # print("Logfile: ", logFilename)
            print("Messaging verbose: ", options.display_verbose)
            print("Input data Root: ", start_folder)
            print("Input data Subfolder: ", options.subFolder)
            print("Reset Core Data: ", options.resetCoreData)
            if options.forceRebuildAllFiles == True:
                msg = "Forced Rebuild - All files added, regardless of whether they are the same as in Solr."
                logger.info(msg)
                print(msg)

            print(80 * "*")
            print(f"Database will be updated. Location: {localsecrets.DBHOST}")
            if not options.glossary_only:  # options.fulltext_core_update:
                print("Solr Full-Text Core will be updated: ", solrurl_docs)
                print("Solr Authors Core will be updated: ", solrurl_authors)
            if 1:  # options.glossary_core_update:
                print("Solr Glossary Core will be updated: ", solrurl_glossary)

            print(80 * "*")
            if options.include_paras:
                print(
                    "--includeparas option selected. Each paragraph will also be stored individually for *Docs* core. Increases core size markedly!"
                )
            else:
                try:
                    print(
                        f"Paragraphs only stored for sources indicated in loaderConfig. Currently: [{', '.join(loaderConfig.src_codes_to_include_paras)}]"
                    )
                except:
                    print(
                        "Paragraphs only stored for sources indicated in loaderConfig."
                    )

            if options.halfway:
                print(
                    "--halfway option selected.  Processing approximately one-half of the files that match."
                )

            if options.run_in_reverse:
                print(
                    "--reverse option selected.  Running the files found in reverse order."
                )

            if options.file_key:
                print(
                    f"--key supplied.  Running for files matching the article id {options.file_key}"
                )

            print(80 * "*")
            if not options.no_check:
                cont = input(
                    "The above databases will be updated.  Do you want to continue (y/n)?"
                )
                if cont.lower() == "n":
                    print("User requested exit.  No data changed.")
                    sys.exit(0)

        except Exception as e:
            msg = f"cores specification error ({e})."
            print((len(msg) * "-"))
            print(msg)
            print((len(msg) * "-"))
            sys.exit(0)

    # import data about the PEP codes for journals and books.
    #  Codes are like APA, PAH, ... and special codes like ZBK000 for a particular book
    sourceDB = opasCentralDBLib.SourceInfoDB()
    solr_docs2 = None
    # The connection call is to solrpy (import was just solr)
    if localsecrets.SOLRUSER is not None and localsecrets.SOLRPW is not None:
        if 1:  # options.fulltext_core_update:
            solr_docs2 = pysolr.Solr(solrurl_docs,
                                     auth=(localsecrets.SOLRUSER,
                                           localsecrets.SOLRPW))
    else:  #  no user and password needed
        solr_docs2 = pysolr.Solr(solrurl_docs)

    # Reset core's data if requested (mainly for early development)
    if options.resetCoreData:
        if not options.glossary_only:  # options.fulltext_core_update:
            msg = "*** Deleting all data from the docs and author cores and database tables ***"
            logger.warning(msg)
            print(msg)
            msg2 = "Biblio and Articles table contents will be reset"
            logger.info(msg2)
            print(msg2)
            ocd.delete_all_article_data()
            solr_docs2.delete(q='*:*')
            solr_docs2.commit()
            solr_authors2.delete(q="*:*")
            solr_authors2.commit()

        # reset glossary core when others are reset, or when --resetcore is selected with --glossaryonly
        if 1:  # options.glossary_core_update:
            msg = "*** Deleting all data from the Glossary core ***"
            logger.warning(msg)
            print(msg)
            solr_gloss2.delete(q="*:*")
            solr_gloss2.commit()
    else:
        # check for missing files and delete them from the core, since we didn't empty the core above
        pass

    # Go through a set of XML files
    bib_total_reference_count = 0  # zero this here, it's checked at the end whether references are processed or not

    # ########################################################################
    # Get list of files to process
    # ########################################################################
    new_files = 0
    total_files = 0

    if options.subFolder is not None:
        start_folder = start_folder / pathlib.Path(options.subFolder)

    print(
        f"Locating files for processing at {start_folder} with pattern {loaderConfig.file_match_pattern}. Started at ({time.ctime()})."
    )
    if options.file_key is not None:
        # print (f"File Key Specified: {options.file_key}")
        pat = fr"({options.file_key}.*){loaderConfig.file_match_pattern}"
        filenames = fs.get_matching_filelist(filespec_regex=pat,
                                             path=start_folder,
                                             max_items=1000)
        if len(filenames) is None:
            msg = f"File {pat} not found.  Exiting."
            logger.warning(msg)
            print(msg)
            exit(0)
        else:
            options.forceRebuildAllFiles = True
    elif options.file_only is not None:
        fileinfo = FileInfo()
        filespec = options.file_only
        fileinfo.mapLocalFS(filespec)
        filenames = [fileinfo]
    else:
        pat = fr"(.*?){loaderConfig.file_match_pattern}"
        filenames = []

    if filenames != []:
        total_files = len(filenames)
        new_files = len(filenames)
    else:
        # get a list of all the XML files that are new
        if options.forceRebuildAllFiles:
            # get a complete list of filenames for start_folder tree
            filenames = fs.get_matching_filelist(filespec_regex=pat,
                                                 path=start_folder)
        else:
            filenames = fs.get_matching_filelist(
                filespec_regex=pat,
                path=start_folder,
                revised_after_date=options.created_after)

    print((80 * "-"))
    files_found = len(filenames)
    if options.forceRebuildAllFiles:
        #maybe do this only during core resets?
        #print ("Clearing database tables...")
        #ocd.delete_all_article_data()
        print(
            f"Ready to import records from {files_found} files at path {start_folder}"
        )
    else:
        print(
            f"Ready to import {files_found} files *if modified* at path: {start_folder}"
        )

    timeStart = time.time()
    print(f"Processing started at ({time.ctime()}).")

    print((80 * "-"))
    precommit_file_count = 0
    skipped_files = 0
    stop_after = 0
    cumulative_file_time_start = time.time()
    issue_updates = {}
    if files_found > 0:
        if options.halfway:
            stop_after = round(files_found / 2) + 5  # go a bit further

        if options.run_in_reverse:
            filenames.reverse()

        # ----------------------------------------------------------------------
        # Now walk through all the filenames selected
        # ----------------------------------------------------------------------
        print(f"Load process started ({time.ctime()}).  Examining files.")

        for n in filenames:
            fileTimeStart = time.time()
            file_updated = False
            if not options.forceRebuildAllFiles:
                if not options.display_verbose and processed_files_count % 100 == 0 and processed_files_count != 0:
                    print(
                        f"Processed Files ...loaded {processed_files_count} out of {files_found} possible."
                    )

                if not options.display_verbose and skipped_files % 100 == 0 and skipped_files != 0:
                    print(
                        f"Skipped {skipped_files} so far...loaded {processed_files_count} out of {files_found} possible."
                    )

                if file_is_same_as_in_solr(solr_docs2,
                                           filename=n.basename,
                                           timestamp_str=n.timestamp_str):
                    skipped_files += 1
                    if options.display_verbose:
                        print(f"Skipped - No refresh needed for {n.basename}")
                    continue
                else:
                    file_updated = True

            # get mod date/time, filesize, etc. for mysql database insert/update
            processed_files_count += 1
            if stop_after > 0:
                if processed_files_count > stop_after:
                    print(
                        f"Halfway mark reached on file list ({stop_after})...file processing stopped per halfway option"
                    )
                    break

            fileXMLContents = fs.get_file_contents(n.filespec)

            # get file basename without build (which is in paren)
            base = n.basename
            artID = os.path.splitext(base)[0]
            # watch out for comments in file name, like:
            #   JICAP.018.0307A updated but no page breaks (bEXP_ARCH1).XML
            #   so skip data after a space
            m = re.match(r"([^ ]*).*\(.*\)", artID)
            # Note: We could also get the artID from the XML, but since it's also important
            # the file names are correct, we'll do it here.  Also, it "could" have been left out
            # of the artinfo (attribute), whereas the filename is always there.
            artID = m.group(1)
            # all IDs to upper case.
            artID = artID.upper()
            msg = "Processing file #%s of %s: %s (%s bytes). Art-ID:%s" % (
                processed_files_count, files_found, base, n.filesize, artID)
            logger.info(msg)
            if options.display_verbose:
                print(msg)

            # import into lxml
            parser = lxml.etree.XMLParser(encoding='utf-8',
                                          recover=True,
                                          resolve_entities=True,
                                          load_dtd=True)
            root = etree.fromstring(
                opasxmllib.remove_encoding_string(fileXMLContents), parser)
            pepxml = root

            # save common document (article) field values into artInfo instance for both databases
            artInfo = opasSolrLoadSupport.ArticleInfo(sourceDB.sourceData,
                                                      pepxml, artID, logger)
            artInfo.filedatetime = n.timestamp_str
            artInfo.filename = base
            artInfo.file_size = n.filesize
            artInfo.file_updated = file_updated
            # not a new journal, see if it's a new article.
            if opasSolrLoadSupport.add_to_tracker_table(
                    ocd,
                    artInfo.art_id):  # if true, added successfully, so new!
                # don't log to issue updates for journals that are new sources added during the annual update
                if artInfo.src_code not in loaderConfig.DATA_UPDATE_PREPUBLICATION_CODES_TO_IGNORE:
                    art = f"<article id='{artInfo.art_id}'>{artInfo.art_citeas_xml}</article>"
                    try:
                        issue_updates[artInfo.issue_id_str].append(art)
                    except Exception as e:
                        issue_updates[artInfo.issue_id_str] = [art]

            try:
                artInfo.file_classification = re.search(
                    "(?P<class>current|archive|future|free|special|offsite)",
                    str(n.filespec), re.IGNORECASE).group("class")
                # set it to lowercase for ease of matching later
                if artInfo.file_classification is not None:
                    artInfo.file_classification = artInfo.file_classification.lower(
                    )
            except Exception as e:
                logger.warning(
                    "Could not determine file classification for %s (%s)" %
                    (n.filespec, e))

            # walk through bib section and add to refs core database

            precommit_file_count += 1
            if precommit_file_count > configLib.opasCoreConfig.COMMITLIMIT:
                print(
                    f"Committing info for {configLib.opasCoreConfig.COMMITLIMIT} documents/articles"
                )

            # input to the glossary
            if 1:  # options.glossary_core_update:
                # load the glossary core if this is a glossary item
                glossary_file_pattern = r"ZBK.069(.*)\(bEXP_ARCH1\)\.(xml|XML)$"
                if re.match(glossary_file_pattern, n.basename):
                    opasSolrLoadSupport.process_article_for_glossary_core(
                        pepxml,
                        artInfo,
                        solr_gloss2,
                        fileXMLContents,
                        verbose=options.display_verbose)

            # input to the full-text and authors cores
            if not options.glossary_only:  # options.fulltext_core_update:
                # load the docs (pepwebdocs) core
                opasSolrLoadSupport.process_article_for_doc_core(
                    pepxml,
                    artInfo,
                    solr_docs2,
                    fileXMLContents,
                    include_paras=options.include_paras,
                    verbose=options.display_verbose)
                # load the authors (pepwebauthors) core.
                opasSolrLoadSupport.process_info_for_author_core(
                    pepxml,
                    artInfo,
                    solr_authors2,
                    verbose=options.display_verbose)
                # load the database
                opasSolrLoadSupport.add_article_to_api_articles_table(
                    ocd, artInfo, verbose=options.display_verbose)

                if precommit_file_count > configLib.opasCoreConfig.COMMITLIMIT:
                    precommit_file_count = 0
                    solr_docs2.commit()
                    solr_authors2.commit()

            # input to the references core
            if 1:  # options.biblio_update:
                if artInfo.ref_count > 0:
                    bibReferences = pepxml.xpath(
                        "/pepkbd3//be"
                    )  # this is the second time we do this (also in artinfo, but not sure or which is better per space vs time considerations)
                    if options.display_verbose:
                        print((
                            "   ...Processing %s references for the references database."
                            % (artInfo.ref_count)))

                    #processedFilesCount += 1
                    bib_total_reference_count = 0
                    ocd.open_connection(caller_name="processBibliographies")
                    for ref in bibReferences:
                        bib_total_reference_count += 1
                        bib_entry = opasSolrLoadSupport.BiblioEntry(
                            artInfo, ref)
                        opasSolrLoadSupport.add_reference_to_biblioxml_table(
                            ocd, artInfo, bib_entry)

                    try:
                        ocd.db.commit()
                    except mysql.connector.Error as e:
                        print("SQL Database -- Biblio Commit failed!", e)

                    ocd.close_connection(caller_name="processBibliographies")

            # close the file, and do the next
            if options.display_verbose:
                print(("   ...Time: %s seconds." %
                       (time.time() - fileTimeStart)))

        print(f"Load process complete ({time.ctime()}).")
        if processed_files_count > 0:
            try:
                print("Performing final commit.")
                if not options.glossary_only:  # options.fulltext_core_update:
                    solr_docs2.commit()
                    solr_authors2.commit()
                    # fileTracker.commit()
                if 1:  # options.glossary_core_update:
                    solr_gloss2.commit()
            except Exception as e:
                print(("Exception: ", e))
            else:
                # Use date time as seed, hoping multiple instances don't get here at the same time
                # but only if caller did not specify
                if randomizer_seed is None:
                    randomizer_seed = int(datetime.utcnow().timestamp())

    # end of docs, authors, and/or references Adds

    # write updated file
    if issue_updates != {}:
        # now seed randomizer, hopefull all instances have a different seed (or caller must force)
        random.seed(randomizer_seed)
        try:
            # temp exception block just until localsecrets has been updated with DATA_UPDATE_LOG_DIR
            try:
                fname = f"{localsecrets.DATA_UPDATE_LOG_DIR}/updated_issues_{dtime.datetime.now().strftime('%Y%m%d_%H%M%S')}({random.randint(1000,9999)}).xml"
            except Exception as e:
                fname = f"updated_issues_{dtime.datetime.now().strftime('%Y%m%d_%H%M%S')}({random.randint(1000,9999)}).xml"

            print(f"Issue updates.  Writing file {fname}")
            with open(fname, 'w', encoding="utf8") as fo:
                fo.write(f'<?xml version="1.0" encoding="UTF-8"?>\n')
                fo.write('<issue_updates>\n')
                for k, a in issue_updates.items():
                    fo.write(f"\n\t<issue>\n\t\t{str(k)}\n\t\t<articles>\n")
                    for ref in a:
                        try:
                            #ref = re.sub(ref, "([Q ])&([ A])", r"\1&amp;\2", flags=re.IGNORECASE)
                            fo.write(f"\t\t\t{ref}\n")
                        except Exception as e:
                            print(f"Issue Update Article Write Error: ({e})")

                    fo.write("\t\t</articles>\n\t</issue>")
                fo.write('\n</issue_updates>')

        except Exception as e:
            print(f"Issue Update File Write Error: ({e})")

    # ---------------------------------------------------------
    # Closing time
    # ---------------------------------------------------------
    timeEnd = time.time()
    #currentfile_info.close()

    # for logging
    if 1:  # (options.biblio_update or options.fulltext_core_update) == True:
        elapsed_seconds = timeEnd - cumulative_file_time_start  # actual processing time going through files
        elapsed_minutes = elapsed_seconds / 60
        if bib_total_reference_count > 0:
            msg = f"Finished! Imported {processed_files_count} documents and {bib_total_reference_count} references. Total file inspection/load time: {elapsed_seconds:.2f} secs ({elapsed_minutes:.2f} minutes.) "
            logger.info(msg)
            print(msg)
        else:
            msg = f"Finished! Imported {processed_files_count} documents. Total file load time: {elapsed_seconds:.2f} secs ({elapsed_minutes:.2f} minutes.)"
            logger.info(msg)
            print(msg)
        if processed_files_count > 0:
            msg = f"...Files loaded per Min: {processed_files_count/elapsed_minutes:.4f}"
            logger.info(msg)
            print(msg)
            msg = f"...Files evaluated per Min (includes skipped files): {len(filenames)/elapsed_minutes:.4f}"
            logger.info(msg)
            print(msg)

    elapsed_seconds = timeEnd - timeStart  # actual processing time going through files
    elapsed_minutes = elapsed_seconds / 60
    msg = f"Note: File load time is not total elapsed time. Total elapsed time is: {elapsed_seconds:.2f} secs ({elapsed_minutes:.2f} minutes.)"
    logger.info(msg)
    print(msg)
    if processed_files_count > 0:
        msg = f"Files per elapsed min: {processed_files_count/elapsed_minutes:.4f}"
        logger.info(msg)
        print(msg)