def CreateFulltextIdsFile(ids_output_file, log_file_name):
    elasticsearch_access_conf = "/usr/local/var/lib/tuelib/Elasticsearch.conf"
    if os.access(elasticsearch_access_conf, os.F_OK):
        util.ExecOrDie("/usr/local/bin/extract_existing_fulltext_ids.sh",
                       [ids_output_file], log_file_name)
    else:  # Skip if configuration is not present
        util.ExecOrDie(util.Which("truncate"), ["-s", "0", log_file_name])
        util.ExecOrDie(util.Which("echo"), [
            "Skip extraction since " + elasticsearch_access_conf +
            " not present"
        ], log_file_name)
Beispiel #2
0
def DumpTranslationsDB(database, user, password, outfile_name):
    ClearOutFile(outfile_name)
    util.ExecOrDie("/usr/bin/mysqldump", [
        "--single-transaction", "--database",
        re.sub('^"|"$', '', database), "--user="******"|"$', '', user),
        "--password="******"|"$', '', password)
    ], outfile_name)
Beispiel #3
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(os.path.basename(sys.argv[0]),
                       "This script needs to be called with an email address as the only argument!\n", priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        ftp_host   = config.get("FTP", "host")
        ftp_user   = config.get("FTP", "username")
        ftp_passwd = config.get("FTP", "password")
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    # Download data from Crossref:
    log_file_name = CreateLogFileName()
    crossref_xml_file = "/tmp/crossref.xml"
    os.unlink(crossref_xml_file)
    util.ExecOrDie("/usr/local/bin/crossref_downloader", [ crossref_xml_file ], log_file_name)

    # Upload the XML data to the BSZ FTP server:
    ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd)
    try:
        with open(crossref_xml_file, "rb") as xml_file:
            ftp.storbinary("STOR crossref.xml", xml_file)
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")
    os.unlink(crossref_xml_file)
    
    util.SendEmail("Crossref Data Import",
                   "Successfully imported Crossref data and uploaded it to the BSZ FTP server.", priority=5)
def CleanUp(title_data_file, log_file_name):
    # Terminate the temporary solr instance
    util.ExecOrDie("/usr/local/bin/shutdown_refterm_solr.sh", [],
                   log_file_name)
    # Clean up temporary title data
    if title_data_file is not None:
        util.Remove(title_data_file)
Beispiel #5
0
def DownloadCrossrefData(output_marc_filename):
    util.ExecOrDie("/usr/local/bin/crossref_downloader", [
        "/usr/local/var/lib/tuelib/crossref_downloader/crossref_journal_list",
        output_marc_filename
    ], "/proc/self/fd/1")
    process = subprocess.Popen(["marc_size", output_marc_filename],
                               stdout=subprocess.PIPE)
    size = process.stdout.readline()
    return int(size) if len(size) > 0 else 0
Beispiel #6
0
def StartPipeline(pipeline_script_name, marc_title, conf):
    log_file_name = util.MakeLogFileName(pipeline_script_name,
                                         util.GetLogDirectory())
    util.ExecOrDie(pipeline_script_name, [marc_title], log_file_name)
    log_file_name = util.MakeLogFileName("import_into_vufind",
                                         util.GetLogDirectory())
    ImportIntoVuFind(conf.get("FileNames", "title_marc_data"),
                     conf.get("FileNames", "authority_marc_data"),
                     log_file_name)
Beispiel #7
0
def DumpMongoDB(config, log_file_name="/dev/stderr"):
    # Backup to intermediate hidden directory that is exluded from backup
    # to prevent inconsistent saving
    dump_base_path = config.get("LocalConfig", "dump_base_path")
    dump_root = config.get("LocalConfig", "dump_root")
    intermediate_dump_dir = dump_base_path + '/.' + dump_root
    util.ExecOrDie(util.Which("mongodump"),
                   ["--out=" + intermediate_dump_dir, "--gzip"], log_file_name)
    final_dump_dir = dump_base_path + '/' + dump_root
    if os.path.exists(final_dump_dir) and os.path.isdir(final_dump_dir):
        rmtree(final_dump_dir)
    move(intermediate_dump_dir, final_dump_dir)
Beispiel #8
0
def RunPipelineAndImportIntoSolr(pipeline_script_name, marc_title, conf):
    log_file_name = util.MakeLogFileName(pipeline_script_name,
                                         util.GetLogDirectory())
    util.ExecOrDie(pipeline_script_name, [marc_title], log_file_name)
    log_file_name = util.MakeLogFileName("import_into_vufind",
                                         util.GetLogDirectory())
    ImportIntoVuFind(conf.get("FileNames", "title_marc_data"),
                     conf.get("FileNames", "authority_marc_data"),
                     log_file_name)

    # Write timestamp file for last successful Solr import:
    with open(
            os.open('/usr/local/vufind/public/last_solr_import',
                    os.O_CREAT | os.O_WRONLY, 0o644), 'w') as output:
        output.write(str(datetime.datetime.now()))
Beispiel #9
0
def ImportOADOIsToMongo(update_list,
                        source_directory=None,
                        log_file_name="/dev/stderr"):
    if not source_directory is None:
        os.chdir(source_directory)
    imported_symlinks_directory = os.getcwd() + "/imported"
    for filename in update_list:
        imported_symlink_full_path = imported_symlinks_directory + "/" + filename
        if os.path.islink(imported_symlink_full_path):
            print("Skipping " + filename +
                  " since apparently already imported")
            continue
        print("Importing \"" + filename + "\"")
        util.ExecOrDie(util.Which("import_oadois_to_mongo.sh"), [filename],
                       log_file_name)
        CreateImportedSymlink(filename, imported_symlink_full_path)
Beispiel #10
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(
            os.path.basename(sys.argv[0]),
            "This script needs to be called with an email address as the only argument!\n",
            priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        sftp_host = config.get("SFTP", "host")
        sftp_user = config.get("SFTP", "username")
        sftp_keyfile = config.get("SFTP", "keyfile")
        local_directory = config.get("Upload", "local_directory")
        directory_on_sftp_server = config.get("Upload",
                                              "directory_on_sftp_server")

    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    # Check directories with new Data
    fulltext_files = GetExistingFiles(local_directory)
    dirs_to_transfer = GetFulltextDirectoriesToTransfer(
        local_directory, fulltext_files)

    # If nothing to do
    if not dirs_to_transfer:
        util.SendEmail("Transfer Fulltexts",
                       "No directories to transfer",
                       priority=5)
        return

    # Transfer the data
    util.ExecOrDie("/usr/local/bin/transfer_fulltext.sh", [
        sftp_host, sftp_user, sftp_keyfile, local_directory,
        directory_on_sftp_server
    ] + list(dirs_to_transfer))
    # Clean up on the server
    CleanUpFiles(fulltext_files)
    email_msg_body = "Found Files:\n\n" + '\n'.join(
        fulltext_files) + "\n\nTransferred directories:\n\n" + '\n'.join(
            dirs_to_transfer)
    util.SendEmail("Transfer Fulltexts", email_msg_body, priority=5)
def Main():
    if len(sys.argv) != 3:
        util.SendEmail(os.path.basename(sys.argv[0]),
                       "This script needs to be called with an email address and the system type!\n", priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]

    system_type = sys.argv[2]
    if system_type != "krimdok" and system_type != "relbib" and system_type != "ixtheo":
        util.SendEmail(os.path.basename(sys.argv[0]),
                       "This system_type must be one of {krimdok,relbib,ixtheo}!\n", priority=1)
        sys.exit(-1)

    output_file = "/tmp/collect_solr_stats_data.csv"
    util.Remove(output_file)
    util.ExecOrDie("/usr/local/bin/collect_solr_stats_data", [ system_type, output_file ],
                   "/usr/local/var/log/tuefind/collect_solr_stats_data.log")

    util.SendEmail("Solr Stats Collector", "Successfully generated Solr statistics and updated Ingo's MySQL database.", priority=5)
Beispiel #12
0
def ImportIntoVuFind(title_pattern, authority_pattern, log_file_name):
    vufind_dir = os.getenv("VUFIND_HOME")
    if vufind_dir == None:
        util.Error("VUFIND_HOME not set, cannot start solr import!")

    # import title data
    title_index = 'biblio'
    title_args = [sorted(glob.glob(title_pattern), reverse=True)[0]]
    if len(title_args) != 1:
        util.Error("\"" + title_pattern + "\" matched " +
                   str(len(title_args)) +
                   " files! (Should have matched exactly 1 file!)")
    ClearSolrIndex(title_index)
    util.ExecOrDie(vufind_dir + "/import-marc.sh", title_args, log_file_name)
    OptimizeSolrIndex(title_index)

    # import authority data
    authority_index = 'authority'
    authority_args = [sorted(glob.glob(authority_pattern), reverse=True)[0]]
    if len(authority_args) != 1:
        util.Error("\"" + authority_pattern + "\" matched " +
                   str(len(authority_args)) +
                   " files! (Should have matched exactly 1 file!)")
    ClearSolrIndex(authority_index)
    util.ExecOrDie(vufind_dir + "/import-marc-auth.sh", authority_args,
                   log_file_name)
    OptimizeSolrIndex(authority_index)
    util.ExecOrDie(
        util.Which("sudo"),
        ["-u", "solr", "-E", vufind_dir + "/index-alphabetic-browse.sh"],
        log_file_name)

    # cleanup logs
    util.ExecOrDie("/usr/local/bin/summarize_logs",
                   [vufind_dir + "/import/solrmarc.log", solrmarc_log_summary])
    util.ExecOrDie("/usr/local/bin/log_rotate",
                   [vufind_dir + "/import/", "solrmarc\\.log"])
    util.ExecOrDie("/usr/local/bin/summarize_logs",
                   [log_file_name, import_log_summary])
    util.ExecOrDie(
        "/usr/local/bin/log_rotate",
        [os.path.dirname(log_file_name),
         os.path.basename(log_file_name)])
Beispiel #13
0
def ExtractRefDataMarcFile(gzipped_tar_archive, output_marc_file, log_file_name):
    util.ExecOrDie("/usr/local/bin/extract_refterm_archive.sh", [gzipped_tar_archive, output_marc_file],
                   log_file_name)
Beispiel #14
0
def CompressAndEncryptFile(infile, outfile, archive_password):
    util.ExecOrDie("/usr/bin/7za",
                   ['a', "-p" + archive_password, outfile, infile])
Beispiel #15
0
def ExtractOADOIURLs(share_directory, all_dois_file, urls_file, log_file_name):
    print("Extract URLs for DOI's in " + all_dois_file)
    util.ExecOrDie(util.Which("extract_oadoi_urls.sh"),
                   [share_directory + '/' + all_dois_file, urls_file],
                   log_file_name)
Beispiel #16
0
def Main():
    if len(sys.argv) != 4 and len(sys.argv) != 5:
        util.SendEmail(os.path.basename(
            sys.argv[0]
        ), "This script needs to be called with an email address, the beacon header file, an output "
                       "path and an optional ppn-filter file as arguments!\n",
                       priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]

    most_recent_authority_filename = GetMostRecentBSZFile(
        "^Normdaten-(\d\d\d\d\d\d).mrc$")
    if most_recent_authority_filename is None:
        util.SendEmail("Beacon Generator",
                       "Found no matching authority files!",
                       priority=1)

    most_recent_titles_filename = GetMostRecentBSZFile(
        "^GesamtTiteldaten-(\d\d\d\d\d\d).mrc$")
    if most_recent_titles_filename is None:
        util.SendEmail("Beacon Generator",
                       "Found no matching title files!",
                       priority=1)

    # Extract the GND numbers from the 035$a subfield of the MARC authority data for authors:
    gnd_numbers_path = "/tmp/gnd_numbers"
    util.ExecOrDie("/usr/local/bin/extract_person_gnd_numbers",
                   [most_recent_authority_filename], gnd_numbers_path)

    # Count GND references in the title data:
    gnd_counts_filename = "/tmp/gnd_counts"
    if len(sys.argv) == 4:
        util.ExecOrDie("/usr/local/bin/count_gnd_refs", [
            gnd_numbers_path, most_recent_titles_filename, gnd_counts_filename
        ])
    else:
        util.ExecOrDie("/usr/local/bin/count_gnd_refs", [
            "--control-number-list=" + sys.argv[4], gnd_numbers_path,
            most_recent_titles_filename, gnd_counts_filename
        ])

    # Generate a file with a timestamp in the Beacon format:
    timestamp_filename = "/tmp/beacon_timestamp"
    with open(timestamp_filename, "w") as timestamp_file:
        timestamp_file.write("#TIMESTAMP: " + str(datetime.date.today()) +
                             "\n")

    # Now generate the final output (header + counts):
    if not util.ConcatenateFiles(
        [sys.argv[2], timestamp_filename, gnd_counts_filename], sys.argv[3]):
        util.SendEmail("Beacon Generator",
                       "An unexpected error occurred: could not write \"" +
                       sys.argv[3] + "\"!",
                       priority=1)

    # Cleanup of temp files:
    os.unlink(gnd_numbers_path)
    os.unlink(timestamp_filename)
    os.unlink(gnd_counts_filename)

    util.SendEmail("Beacon File Generator",
                   "Successfully created a Beacon file.",
                   priority=5)
Beispiel #17
0
def SetupTemporarySolrInstance(title_data_file, conf, log_file_name):
    # Setup a temporary solr instance in a ramdisk and import title data
    util.ExecOrDie("/usr/local/bin/setup_refterm_solr.sh", [title_data_file], log_file_name)
Beispiel #18
0
def CreateMatchDB(title_marc_data, log_file_name):
    util.ExecOrDie("/usr/local/bin/create_match_db", [ title_marc_data ], log_file_name, setsid=False);
def Main():
    if len(sys.argv) != 4 and len(sys.argv) != 5 and len(sys.argv) != 6 \
        or (len(sys.argv) == 6 and not sys.argv[1].startswith("--filter-field=")):
        SendUsageAndExit()

    count_gnd_refs_args = []
    if sys.argv[1].startswith("--filter-field="):
        count_gnd_refs_args.append(sys.argv[1])
        del sys.argv[1]

    if len(sys.argv) != 4 and len(sys.argv) != 5:
        SendUsageAndExit()

    util.default_email_recipient = sys.argv[1]

    most_recent_authority_filename = GetMostRecentBSZFile(
        "^Normdaten-(\d\d\d\d\d\d).mrc$")
    if most_recent_authority_filename is None:
        util.SendEmailAndExit("Beacon Generator",
                              "Found no matching authority files!",
                              priority=1)

    most_recent_titles_filename = GetMostRecentBSZFile(
        "^GesamtTiteldaten-post-pipeline-(\d\d\d\d\d\d).mrc$")
    if most_recent_titles_filename is None:
        util.SendEmailAndExit("Beacon Generator",
                              "Found no matching title files!",
                              priority=1)

    # Extract the GND numbers from the 035$a subfield of the MARC authority data for authors:
    gnd_numbers_path = "/tmp/gnd_numbers"
    util.ExecOrDie("/usr/local/bin/extract_person_gnd_numbers",
                   [most_recent_authority_filename], gnd_numbers_path)

    # Count GND references in the title data:
    gnd_counts_filename = "/tmp/gnd_counts"
    if len(sys.argv) > 4:
        count_gnd_refs_args.append("--control-number-list=" + sys.argv[4])
    count_gnd_refs_args.extend(
        [gnd_numbers_path, most_recent_titles_filename, gnd_counts_filename])
    util.ExecOrDie("/usr/local/bin/count_gnd_refs", count_gnd_refs_args)

    # Generate a file with a timestamp in the Beacon format:
    timestamp_filename = "/tmp/beacon_timestamp"
    with open(timestamp_filename, "w") as timestamp_file:
        timestamp_file.write("#TIMESTAMP: " + str(datetime.date.today()) +
                             "\n")

    # Now generate the final output (header + counts):
    if not util.ConcatenateFiles(
        [sys.argv[2], timestamp_filename, gnd_counts_filename], sys.argv[3]):
        util.SendEmailAndExit(
            "Beacon Generator",
            "An unexpected error occurred: could not write \"" + sys.argv[3] +
            "\"!",
            priority=1)

    # Cleanup of temp files:
    os.unlink(gnd_numbers_path)
    os.unlink(timestamp_filename)
    os.unlink(gnd_counts_filename)

    util.SendEmailAndExit("Beacon File Generator",
                          "Successfully created a Beacon file.",
                          priority=5)