Example #1
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(os.path.basename(sys.argv[0]),
                       "This script needs to be called with an email address as the only argument!\n", priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        ftp_host   = config.get("FTP", "host")
        ftp_user   = config.get("FTP", "username")
        ftp_passwd = config.get("FTP", "password")
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    # Download data from Crossref:
    log_file_name = CreateLogFileName()
    crossref_xml_file = "/tmp/crossref.xml"
    os.unlink(crossref_xml_file)
    util.ExecOrDie("/usr/local/bin/crossref_downloader", [ crossref_xml_file ], log_file_name)

    # Upload the XML data to the BSZ FTP server:
    ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd)
    try:
        with open(crossref_xml_file, "rb") as xml_file:
            ftp.storbinary("STOR crossref.xml", xml_file)
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")
    os.unlink(crossref_xml_file)
    
    util.SendEmail("Crossref Data Import",
                   "Successfully imported Crossref data and uploaded it to the BSZ FTP server.", priority=5)
Example #2
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(
            os.path.basename(sys.argv[0]),
            "This script needs to be called with an email address as the only argument!\n",
            priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        ftp_host = config.get("FTP", "host")
        ftp_user = config.get("FTP", "username")
        ftp_passwd = config.get("FTP", "password")
        directory_on_ftp_server = config.get("Upload",
                                             "directory_on_ftp_server")
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    marc_filename = "/tmp/crossref_marc.xml"
    no_of_records = DownloadCrossrefData(marc_filename)
    if no_of_records == 0:
        email_msg_body = "No new records.\n\n"
    else:
        ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd)
        try:
            ftp.cwd(directory_on_ftp_server)
        except:
            util.Error("failed to change directory on the FTP server to \"" +
                       directory_on_ftp_server + "\"!")
        UploadFile(ftp, marc_filename, GenerateRemoteFilename())
        email_msg_body = "Uploaded " + str(
            no_of_records) + " MARC records to the BSZ FTP server.\n\n"
    os.unlink(marc_filename)
    util.SendEmail("BSZ Crossref File Upload", email_msg_body, priority=5)
def Main():
    if len(sys.argv) != 2:
        util.Error("This script expects one argument: default_email_recipient")
    util.default_email_recipient = sys.argv[1]
    config = util.LoadConfigFile()
    try:
        deletion_list = config.get("Files", "loesch_liste")
        complete_data = config.get("Files", "komplett_abzug")
        differential_data = config.get("Files", "differenz_abzug")
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")
    if not os.access(complete_data, os.R_OK):
        util.Error("Fehlender oder nicht lesbarer Komplettabzug. (" +
                   complete_data + ")")
    deletion_list_is_readable = os.access(deletion_list, os.R_OK)
    if not deletion_list_is_readable:
        deletion_list = None
    differential_data_is_readable = os.access(differential_data, os.R_OK)
    if not deletion_list_is_readable and not differential_data_is_readable:
        util.Error(
            "Fehlende oder nicht lesbare Löschliste und Differenzabzug..")

    # Bail out if the most recent complete data set is at least as recent as the deletion list or the differential
    # data:
    complete_data_mtime = os.path.getmtime(complete_data)
    deletion_list_mtime = None
    if deletion_list_is_readable:
        deletion_list_mtime = os.path.getmtime(deletion_list)
    differential_data_mtime = None
    if differential_data_is_readable:
        differential_data_mtime = os.path.getmtime(differential_data)
    if ((deletion_list_mtime is not None
         and complete_data_mtime >= deletion_list_mtime)
            or (differential_data_mtime is not None
                and complete_data_mtime >= differential_data_mtime)):
        util.SendEmail(
            "Nichts zu tun!",
            "Komplettabzug ist neuer als eventuell vorhandene Differenzabzüge.\n",
            priority=5)
        sys.exit(0)

    data_dir = PrepareDataDirectory(
    )  # After this we're in the data directory...

    util.ExtractAndRenameBSZFiles("../" + complete_data)
    util.ExtractAndRenameBSZFiles("../" + differential_data, "Diff")
    title_superior_norm_tuple = UpdateAllMarcFiles(
        deletion_list)  # ...and we're back in the original directory.

    new_tarball_name = complete_data.replace(
        "current",
        datetime.date.today().strftime("%y%m%d"))
    CreateNewTarballAndDeletePreviousTarball(new_tarball_name,
                                             title_superior_norm_tuple,
                                             complete_data)
    util.RemoveLinkTargetAndLink(title_superior_norm_tuple[0])
    util.RemoveLinkTargetAndLink(title_superior_norm_tuple[1])
    util.RemoveLinkTargetAndLink(title_superior_norm_tuple[2])
    util.Info("Successfully created updated MARC files.")
Example #4
0
def Main():
    if len(sys.argv) != 3:
        util.Info("usage: " + sys.argv[0] + " section entry", file=sys.stderr)
        sys.exit(-1)

    util.default_email_recipient = "*****@*****.**"
    config = util.LoadConfigFile()
    util.Info(config.get(sys.argv[1], sys.argv[2]))
Example #5
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(
            os.path.basename(sys.argv[0]),
            "This script requires an email address as the only argument\n",
            priority=1,
            recipient=util.default_email_recipient)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    user = sys.argv[1]
    try:
        sql_config = util.LoadConfigFile(
            "/usr/local/var/lib/tuelib/translations.conf")
        sql_database = sql_config.get("Database", "sql_database")
        sql_username = sql_config.get("Database", "sql_username")
        sql_password = sql_config.get("Database", "sql_password")
    except Exception as e:
        util.Error("Failed to read sql_config file (" + str(e) + ")")

    try:
        config = util.LoadConfigFile()
        archive_password = config.get("Passwords", "archive_password")
    except Exception as e:
        util.Error("Failed to read config file (" + str(e) + ")")

    raw_dump_file = tmp_file_path + "/" + output_filename
    DumpTranslationsDB(sql_database, sql_username, sql_password, raw_dump_file)

    compressed_and_encrypted_dump_file = re.sub(
        '\..*$', '',
        raw_dump_file) + compressed_extension + encrypted_extension
    CompressAndEncryptFile(raw_dump_file, compressed_and_encrypted_dump_file,
                           re.sub('^"|"$', '', archive_password))

    MoveToDownloadPosition(compressed_and_encrypted_dump_file, web_server_path)
    CleanUp(raw_dump_file)
    servername = DetermineServerName()
    NotifyUser(user, servername,
               os.path.basename(compressed_and_encrypted_dump_file))
Example #6
0
def Main():
    util.default_email_sender = "*****@*****.**"
    util.default_email_recipient = "*****@*****.**"
    if len(sys.argv) != 2:
        util.SendEmail("Create Refterm File (Kickoff Failure)",
                       "This script must be called with one argument,\n" +
                       "the default email recipient\n",
                       priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    # Download needed differential files
    config = util.LoadConfigFile()
    log_file_name = log_file_name = util.MakeLogFileName(
        sys.argv[0], util.GetLogDirectory())
    changelist_url = config.get("Unpaywall", "changelist_url")
    api_key = config.get("Unpaywall", "api_key")
    oadoi_download_directory = config.get("LocalConfig", "download_dir")
    oadoi_imported_directory = oadoi_download_directory + "/imported/"
    StartMongoDB()
    json_update_objects = GetChangelists(changelist_url, api_key)
    remote_update_files = GetRemoteUpdateFiles(json_update_objects)
    local_update_files = GetLocalUpdateFiles(config, oadoi_download_directory)
    download_lists = GetAllFilesStartingAtFirstMissingLocal(
        remote_update_files, local_update_files)
    DownloadUpdateFiles(download_lists['download'], json_update_objects,
                        api_key, oadoi_download_directory)

    # Update the Database
    ImportOADOIsToMongo(
        GetImportFiles(config, oadoi_download_directory,
                       oadoi_imported_directory), oadoi_download_directory,
        log_file_name)

    # Generate the files to be used by the pipeline
    share_directory = config.get("LocalConfig", "share_directory")
    ixtheo_dois_file = config.get("LocalConfig", "ixtheo_dois_file")
    ixtheo_urls_file = config.get("LocalConfig", "ixtheo_urls_file")
    ExtractOADOIURLs(share_directory, ixtheo_dois_file, ixtheo_urls_file,
                     log_file_name)
    ShareOADOIURLs(share_directory, ixtheo_urls_file)
    krimdok_dois_file = config.get("LocalConfig", "krimdok_dois_file")
    krimdok_urls_file = config.get("LocalConfig", "krimdok_urls_file")
    ExtractOADOIURLs(share_directory, krimdok_dois_file, krimdok_urls_file,
                     log_file_name)
    ShareOADOIURLs(share_directory, krimdok_urls_file)
    DumpMongoDB(config, log_file_name)
    StopMongoDB()
    util.SendEmail("Update OADOI Data",
                   "Successfully created \"" + ixtheo_urls_file + "\" and \"" +
                   krimdok_urls_file + "\" in " + share_directory,
                   priority=5)
Example #7
0
def Main():
    config = util.LoadConfigFile()
    changelist_url = config.get("Unpaywall", "changelist_url")
    api_key = config.get("Unpaywall", "api_key")
    working_dir = config.get("LocalConfig", "working_dir")
    json_update_objects = GetRemoteUpdateObjects(changelist_url, api_key)
    remote_update_files = GetRemoteUpdateFiles(json_update_objects)
    local_update_files = GetLocalUpdateFiles(config, working_dir)
    update_and_download_lists = GetAllFilesFromLastMissingLocal(
        remote_update_files, local_update_files)
    if not update_and_download_lists:
        print("Received empty list - so nothing to do")
        sys.exit(0)
    DownloadUpdateFiles(update_and_download_lists['download'],
                        json_update_objects, api_key, working_dir)
    UpdateDatabase(update_and_download_lists['update'], config)
Example #8
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(
            os.path.basename(sys.argv[0]),
            "This script needs to be called with an email address as the only argument!\n",
            priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        ftp_host = config.get("FTP", "host")
        ftp_user = config.get("FTP", "username")
        ftp_passwd = config.get("FTP", "password")
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd)
    msg = []

    download_cutoff_date = IncrementStringDate(
        GetCutoffDateForDownloads(config))
    complete_data_filenames = DownloadCompleteData(config, ftp,
                                                   download_cutoff_date, msg)
    if complete_data_filenames is not None:
        download_cutoff_date = ExtractDateFromFilename(
            complete_data_filenames[0])
    DownloadData(config, "Differenzabzug", ftp, download_cutoff_date, msg)
    DownloadData(config, "Loeschlisten", ftp, download_cutoff_date, msg)
    if config.has_section("Loeschlisten2"):
        DownloadData(config, "Loeschlisten2", ftp, download_cutoff_date, msg)
    if config.has_section("Hinweisabzug"):
        DownloadData(config, "Hinweisabzug", ftp, "000000", msg)
    if config.has_section("Errors"):
        DownloadData(config, "Errors", ftp, download_cutoff_date, msg)
    incremental_authority_cutoff_date = ShiftDateToTenDaysBefore(
        download_cutoff_date)
    if config.has_section("Normdatendifferenzabzug"):
        if (not CurrentIncrementalAuthorityDumpPresent(
                config, incremental_authority_cutoff_date)):
            DownloadData(config, "Normdatendifferenzabzug", ftp,
                         incremental_authority_cutoff_date, msg)
        else:
            msg.append(
                "Skipping Download of \"Normdatendifferenzabzug\" since already present\n"
            )
    CleanUpCumulativeCollection(config)
    util.SendEmail("BSZ File Update", string.join(msg, ""), priority=5)
Example #9
0
def Main():
    util.default_email_sender = "*****@*****.**"
    if len(sys.argv) != 3:
        print(
            "invalid arguments! usage: initiate_marc_pipeline.py <default email recipient> <MARC21 pipeline script name>"
        )
        util.SendEmail(
            "MARC-21 Pipeline Kick-Off (Failure)",
            "This script needs to be called with two arguments,\n" +
            "the default email recipient and the name of the MARC-21\n" +
            "pipeline script to be executed.\n",
            priority=1)
        sys.exit(-1)

    util.default_email_recipient = sys.argv[1]
    pipeline_script_name = sys.argv[2]
    if not os.access(pipeline_script_name, os.X_OK):
        print("Pipeline script not found or not executable: " +
              pipeline_script_name)
        util.SendEmail("MARC-21 Pipeline Kick-Off (Failure)",
                       "Pipeline script not found or not executable: \"" +
                       pipeline_script_name + "\"\n",
                       priority=1)
        sys.exit(-1)
    conf = util.LoadConfigFile()
    link_name = conf.get("Misc", "link_name")
    if FoundNewBSZDataFile(link_name):
        if not FoundReftermMutex():
            util.Error("No Refterm Mutex found")
        bsz_data = util.ResolveSymlink(link_name)
        if not bsz_data.endswith(".tar.gz"):
            util.Error("BSZ data file must end in .tar.gz!")
        file_name_list = util.ExtractAndRenameBSZFiles(bsz_data)

        RunPipelineAndImportIntoSolr(pipeline_script_name, file_name_list[0],
                                     conf)
        util.SendEmail("MARC-21 Pipeline",
                       "Pipeline completed successfully.",
                       priority=5,
                       attachments=[solrmarc_log_summary, import_log_summary])
        util.WriteTimestamp()
        DeleteReftermMutex()
        WriteImportFinishedFile()
    else:
        util.SendEmail("MARC-21 Pipeline Kick-Off",
                       "No new data was found.",
                       priority=5)
Example #10
0
def Main():
    if len(sys.argv) != 2:
        util.Error("This script expects one argument: default_email_recipient")
    util.default_email_recipient = sys.argv[1]
    config = util.LoadConfigFile()

    try:
        generations_to_keep = config.getint("PurgeFiles", "generations_to_keep")
    except Exception as e:
        util.Error("failed to read config file! ("+ str(e) + ")")
    if generations_to_keep < 1:
        util.Error("generations_to_keep must be at least 1!")

    all_timestamped_files = glob.glob("*[0-9][0-9][0-9][0-9][0-9][0-9]*")
    if not all_timestamped_files:
        util.SendEmail("File Purge Failed", "No timestamped files found!", priority=1)
    PurgeFiles(generations_to_keep, all_timestamped_files)
Example #11
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(
            os.path.basename(sys.argv[0]),
            "This script needs to be called with an email address as the only argument!\n",
            priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        sftp_host = config.get("SFTP", "host")
        sftp_user = config.get("SFTP", "username")
        sftp_keyfile = config.get("SFTP", "keyfile")
        local_directory = config.get("Upload", "local_directory")
        directory_on_sftp_server = config.get("Upload",
                                              "directory_on_sftp_server")

    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    # Check directories with new Data
    fulltext_files = GetExistingFiles(local_directory)
    dirs_to_transfer = GetFulltextDirectoriesToTransfer(
        local_directory, fulltext_files)

    # If nothing to do
    if not dirs_to_transfer:
        util.SendEmail("Transfer Fulltexts",
                       "No directories to transfer",
                       priority=5)
        return

    # Transfer the data
    util.ExecOrDie("/usr/local/bin/transfer_fulltext.sh", [
        sftp_host, sftp_user, sftp_keyfile, local_directory,
        directory_on_sftp_server
    ] + list(dirs_to_transfer))
    # Clean up on the server
    CleanUpFiles(fulltext_files)
    email_msg_body = "Found Files:\n\n" + '\n'.join(
        fulltext_files) + "\n\nTransferred directories:\n\n" + '\n'.join(
            dirs_to_transfer)
    util.SendEmail("Transfer Fulltexts", email_msg_body, priority=5)
Example #12
0
def Main():
    util.default_email_sender = "*****@*****.**"
    util.default_email_recipient = "*****@*****.**"
    if len(sys.argv) != 2:
         util.SendEmail("Create Refterm File (Kickoff Failure)",
                        "This script must be called with one argument,\n"
                        + "the default email recipient\n", priority=1);
         sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    conf = util.LoadConfigFile()
    title_data_link_name = conf.get("Misc", "link_name")
    ref_data_pattern = conf.get("Hinweisabzug", "filename_pattern")
    if ref_data_pattern != "" :
        ref_data_archive = util.getMostRecentFileMatchingGlob(ref_data_pattern)
        if ref_data_archive is None:
            util.SendEmail("Create Refterm File (No Reference Data File Found)",
                           "No File matching pattern \"" + ref_data_pattern + "\" found\n", priority=1)
    else:
        ref_data_archive = None

    if FoundNewBSZDataFile(title_data_link_name):
        start = datetime.datetime.now()
        log_file_name = CreateLogFile()
        title_data_file_orig = ExtractTitleDataMarcFile(title_data_link_name)
        date_string = GetDateFromFilename(title_data_file_orig)
        title_data_file = RenameTitleDataFile(title_data_file_orig, date_string)
        atexit.register(CleanUp, title_data_file, log_file_name)
        SetupTemporarySolrInstance(title_data_file, conf, log_file_name)
        create_ref_term_process = multiprocessing.Process(target=CreateRefTermFile, name="Create Reference Terms File",
                                      args=[ ref_data_archive, date_string, conf, log_file_name ])
        create_serial_sort_term_process = multiprocessing.Process(target=CreateSerialSortDate, name="Serial Sort Date",
                                              args=[ title_data_file, date_string, log_file_name ])
        create_match_db_log_file_name = util.MakeLogFileName("create_match_db", util.GetLogDirectory())
        create_match_db_process = multiprocessing.Process(target=CreateMatchDB, name="Create Match DB",
                                      args=[ title_data_file, create_match_db_log_file_name ])
        ExecuteInParallel(create_ref_term_process, create_serial_sort_term_process, create_match_db_process)
        end  = datetime.datetime.now()
        duration_in_minutes = str((end - start).seconds / 60.0)
        util.SendEmail("Create Refterm File", "Refterm file successfully created in " + duration_in_minutes + " minutes.", priority=5)
    else:
        util.SendEmail("Create Refterm File", "No new data was found.", priority=5)
Example #13
0
def Main():
    util.default_email_recipient = sys.argv[1]
    config = util.LoadConfigFile()
    if config.has_option("Global", "validate_ssl_certificates"):
        if not config.getboolean("Global", "validate_ssl_certificates"):
            ssl._create_default_https_context = ssl._create_unverified_context

    for section in config.sections():
        if section == "Global":
            continue
        url = config.get(section, "url")
        expected = None
        if config.has_option(section, "expected"):
            expected = config.get(section, "expected")
        timeout = None
        if config.has_option(section, "timeout"):
            timeout = config.getfloat(section, "timeout")
        if not RunTest(section, url, timeout, expected):
            util.SendEmail("Black Box Test Failed!",
                           "Test " + section +
                           " failed!\n\n--Your friendly black box monitor",
                           "*****@*****.**",
                           priority=1)
Example #14
0
def Main():
    if len(sys.argv) != 2:
        util.SendEmail(
            os.path.basename(sys.argv[0]),
            "This script needs to be called with an email address as the only argument!\n",
            priority=1)
        sys.exit(-1)
    util.default_email_recipient = sys.argv[1]
    try:
        config = util.LoadConfigFile()
        ftp_host = config.get("FTP", "host")
        ftp_user = config.get("FTP", "username")
        ftp_passwd = config.get("FTP", "password")
    except Exception as e:
        util.Error("failed to read config file! (" + str(e) + ")")

    ftp = util.FTPLogin(ftp_host, ftp_user, ftp_passwd)
    msg = []
    tempdir = tempfile.TemporaryDirectory()
    bsz_dir = os.getcwd()
    os.chdir(tempdir.name)
    download_cutoff_date = IncrementStringDate(
        GetCutoffDateForDownloads(config))
    complete_data_filenames = DownloadCompleteData(config, ftp,
                                                   download_cutoff_date, msg)
    all_downloaded_files = [] if complete_data_filenames == None else complete_data_filenames
    downloaded_at_least_some_new_titles = False
    if complete_data_filenames is not None:
        download_cutoff_date = ExtractDateFromFilename(
            complete_data_filenames[0])
        downloaded_at_least_some_new_titles = True
    all_downloaded_files += DownloadData(config, "Differenzabzug", ftp,
                                         download_cutoff_date, msg)
    if all_downloaded_files is not []:
        downloaded_at_least_some_new_titles = True
    all_downloaded_files += DownloadData(config, "Loeschlisten", ftp,
                                         download_cutoff_date, msg)
    if config.has_section("Loeschlisten2"):
        all_downloaded_files += DownloadData(config, "Loeschlisten2", ftp,
                                             download_cutoff_date, msg)
    if config.has_section("Hinweisabzug"):
        DownloadData(config, "Hinweisabzug", ftp, "000000", msg)
    if config.has_section("Errors"):
        all_downloaded_files += DownloadData(config, "Errors", ftp,
                                             download_cutoff_date, msg)
    incremental_authority_cutoff_date = ShiftDateToTenDaysBefore(
        download_cutoff_date)
    if config.has_section("Normdatendifferenzabzug"):
        if (not CurrentIncrementalAuthorityDumpPresent(
                config, incremental_authority_cutoff_date)):
            all_downloaded_files += DownloadData(
                config, "Normdatendifferenzabzug", ftp,
                incremental_authority_cutoff_date, msg)
        else:
            msg.append(
                "Skipping Download of \"Normdatendifferenzabzug\" since already present\n"
            )
    try:
        for downloaded_file in all_downloaded_files:
            shutil.copy(downloaded_file, bsz_dir)
    except Exception as e:
        util.Error(
            "Moving a downloaded file to the BSZ download directory failed! ("
            + str(e) + ")")

    AddToCumulativeCollection(all_downloaded_files, config)
    CleanUpCumulativeCollection(config)
    if downloaded_at_least_some_new_titles:
        util.Touch("/tmp/bsz_download_happened"
                   )  # Must be the same path as in the merge script!
    util.SendEmail("BSZ File Update", ''.join(msg), priority=5)