def __init__(self):
        # Load the configuration for the filters
        try:
            filter_list_from_config = Config.filters()
        except Exception as e:
            log("ERROR", "Filter configuration error: " + str(e))
            sys.exit()  # Configuration errors should prevent execution

        # Create (and empty if needed) base filter path
        self.filter_base_dir = os.path.join(Config.get('Saved_Log_Dir'),
                                            'temp_filter_processing')
        if os.path.isdir(self.filter_base_dir):
            shutil.rmtree(self.filter_base_dir)

        # Create staging (all original files go here) and final (all filtered files end up here) directories
        os.makedirs(os.path.join(self.filter_base_dir, 'staging'),
                    exist_ok=True)
        os.makedirs(os.path.join(self.filter_base_dir, 'final'), exist_ok=True)

        # Build the filter instance list
        for filter_from_config in filter_list_from_config:
            self.instance_list.append(
                FilterInstance(filter_from_config.get("name"),
                               filter_from_config.get("exe"),
                               filter_from_config.get("type"),
                               filter_from_config.get("timeout"),
                               self.filter_base_dir))

        log(
            "INFO",
            str(len(filter_list_from_config)) +
            " valid filters have been found")
Example #2
0
def main():
    # Parse command line arguments (if any)
    parser = argparse.ArgumentParser(
        description=
        "GlideinMonitor's indexing script for GlideIn .out & .err files")
    parser.add_argument('-c', help="Path to Config File")
    parser.add_argument('-f',
                        help="Ignore the lock file and force an index anyway",
                        action='store_true')
    args = parser.parse_args()

    # Process config file
    Config.init(args.c)

    # Begin Indexing
    begin_indexing(args)
Example #3
0
def main():
    # Parse command line arguments (if any)
    parser = argparse.ArgumentParser(
        description=
        "GlideinMonitor's indexing script for GlideIn .out & .err files")
    parser.add_argument('-c', help="Path to Config File")
    parser.add_argument('-f',
                        help="Ignore the lock file and force an index anyway",
                        action='store_true')
    args = parser.parse_args()

    # Process config file
    Config.init(args.c)

    # Check for index job lock
    lock_location = os.path.join(Config.get('Saved_Log_Dir'), "index_lock")
    if not pathlib.Path(lock_location).exists():
        try:
            os.mkdir(Config.get('Saved_Log_Dir'))
            log("INFO", "Creating new directory for index_lock")
        except FileExistsError:
            pass
        pathlib.Path(lock_location).touch()
    else:
        # Job index already running/did not complete
        if not args.f:
            log("ERROR", "Lock file present in saved log directory")
            return

    # Connect to the database
    db = Database()

    # Get list of job data that should be indexed
    job_index_list = determine_indexing(db)

    # Archive the original files
    archive_files(db, job_index_list)

    # Indexing & filtering complete
    db.commit()
    log("INFO", "Indexing Complete")

    # Delete the lock file
    os.remove(pathlib.Path(lock_location))
Example #4
0
def log(error_level, message):
    if Config.get("Log_Level") == "NONE":
        return

    if Config.get("Log_Level") == "ERROR":
        if error_level != "ERROR":
            return

    if Config.get("Log_Level") == "WARNING":
        if error_level == "INFO":
            return

    # Write to error log
    log_location_dir = os.path.join(Config.get('Log_Dir'), 'indexer')
    if not os.path.exists(log_location_dir):
        os.makedirs(log_location_dir)
    log_location = os.path.join(log_location_dir, datetime.datetime.now().strftime("%Y-%m-%d") + ".txt")
    with open(log_location, "a") as log_file:
        log_file.write(error_level + " - " + str(int(datetime.datetime.now().timestamp())) + " - " + message + "\n")
Example #5
0
def main():
    # Parse command line arguments (if any)
    parser = argparse.ArgumentParser(description="GlideinMonitor's Flask Web Server")
    parser.add_argument('-c', help="Path to Config File")
    args = parser.parse_args()

    # Process config file
    Config.init(args.c)

    # Redirect Flask output to log file
    log_location_dir = os.path.join(Config.get('Log_Dir'), 'server')
    if not os.path.exists(log_location_dir):
        os.makedirs(log_location_dir)
    log_location = os.path.join(log_location_dir, datetime.datetime.now().strftime("%Y-%m-%d") + ".txt")
    sys.stderr = open(log_location, "a")
    sys.stdout = open(log_location, "a")

    # Start the Server
    app.run(host=Config.get('Host'), port=Config.get('Port'))
Example #6
0
def archive_files(db, job_index_list):
    saved_dir_name = Config.get('Saved_Log_Dir')
    datetime_name = datetime.datetime.now().strftime("%Y-%m-%d")

    # Initialize the filter
    index_filter = Filter()

    for job_data in job_index_list:
        # Check if the current instance is in the database, if not then add it
        final_dir_name_original = os.path.join(saved_dir_name, "original",
                                               job_data["instance_name"],
                                               job_data["frontend_user"],
                                               datetime_name)
        final_dir_name_filter = os.path.join(saved_dir_name, "filter",
                                             job_data["instance_name"],
                                             job_data["frontend_user"],
                                             datetime_name)

        # Create the directories if they do not exist
        if not os.path.exists(final_dir_name_original):
            os.makedirs(final_dir_name_original)
        if not os.path.exists(final_dir_name_filter):
            os.makedirs(final_dir_name_filter)

        # Tar the output and error file
        save_file_name = job_data["instance_name"] + "_" + job_data[
            "entry_name"] + "_" + job_data["job_id"] + ".tar.gz"
        file_path_original = os.path.join(final_dir_name_original,
                                          "original_" + save_file_name)
        file_path_filter = os.path.join(final_dir_name_filter,
                                        "filter_" + save_file_name)

        # Save the original immediately
        with tarfile.open(file_path_original, "w:gz") as tar:
            tar.add(job_data["out_file_path"],
                    arcname=os.path.basename(job_data["out_file_path"]))
            tar.add(job_data["err_file_path"],
                    arcname=os.path.basename(job_data["err_file_path"]))
            tar.close()

        # An archive of the original files has been created, filePath_original
        # Now, add the job to the filter queue and give it the final destination full path, filePath_filter
        index_filter.add_job(file_path_filter, job_data)

        # Add/Update it in the database
        db.add_job(job_data, file_path_original, file_path_filter)

    # Ensure filters have completed running before the archive is complete
    while index_filter.filters_still_running():
        time.sleep(1)

    # Cleanup filter folders
    index_filter.cleanup()
Example #7
0
def api_job_file(job_id, given_guid):
    # Get configuration
    file_type = Config.get('DisplayType')

    # Sends the job file itself
    db = Database()
    path = db.getFile(job_id, given_guid, file_type)
    db.quit()

    # If it's not found, send a 404
    if path is None:
        abort(404)

    return path
Example #8
0
def begin_indexing(args):
    # Check for index job lock
    lock_location = os.path.join(Config.get('Saved_Log_Dir'), "index_lock")
    if not pathlib.Path(lock_location).exists():
        pathlib.Path(lock_location).touch()
    else:
        # Job index already running/did not complete
        if not args.f:
            log("ERROR", "Lock file present in saved log directory")
            return

    # Entry point for indexing
    db = Database()
    jobs_updated = 0
    saved_dir_name = Config.get('Saved_Log_Dir')
    datetime_name = datetime.datetime.now().strftime("%Y-%m-%d")

    log("INFO", "Begin Indexing")

    # Get a dictionary of jobs from the GWMS_Log_Dir directory
    tree = directory_jobs(Config.get('GWMS_Log_Dir'))

    log("INFO", "Directory Listing Completion")

    # Iterate through each job checking the database if it needs to be updated
    for job_name, job_data in tree.items():
        # Skip entries that are missing an err/out file
        if "err_file_path" not in job_data or "out_file_path" not in job_data:
            log(
                "INFO", "Missing ERR/OUT file for entry - jobID: " +
                job_data["entry_name"] + " - " + str(job_data["job_id"]))
            continue

        # Check if the current instance is in the database, if not then add it
        final_dir_name = os.path.join(saved_dir_name,
                                      job_data["instance_name"],
                                      job_data["frontend_user"], datetime_name)

        if db.needs_update(job_data):
            # Create the directory if it does not exist
            if not os.path.exists(final_dir_name):
                os.makedirs(final_dir_name)

            # Check if the file has certain logs within it
            found_logs = {
                "MasterLog": False,
                "StartdLog": False,
                "StarterLog": False,
                "StartdHistoryLog": False,
                "glidein_activity": False
            }
            if job_data['err_file_size'] != 0:
                with open(job_data["err_file_path"], 'rb',
                          0) as file, mmap.mmap(file.fileno(),
                                                0,
                                                access=mmap.ACCESS_READ) as s:
                    if s.find(b'MasterLog\n========') != -1:
                        found_logs["MasterLog"] = True
                    if s.find(b'StartdLog\n========') != -1:
                        found_logs["StartdLog"] = True
                    if s.find(b'StarterLog\n========') != -1:
                        found_logs["StarterLog"] = True
                    if s.find(b'StartdHistoryLog\n========') != -1:
                        found_logs["StartdHistoryLog"] = True
                    if s.find(
                            b'=== Encoded XML description of glidein activity ==='
                    ) != -1:
                        found_logs["glidein_activity"] = True

            # Tar the output and error file
            curr_job_path = os.path.join(
                final_dir_name, job_name[0] + "_" + job_name[1] + "_" +
                job_name[2] + ".tar.gz")
            with tarfile.open(curr_job_path, "w:gz") as tar:
                tar.add(job_data["out_file_path"],
                        arcname=os.path.basename(job_data["out_file_path"]))
                tar.add(job_data["err_file_path"],
                        arcname=os.path.basename(job_data["err_file_path"]))
                tar.close()

            # Add/Update it in the database
            db.add_job(job_data, curr_job_path, found_logs)

            # Job added/updated
            jobs_updated += 1

    # Indexing complete
    db.commit()

    # Delete the lock file
    os.remove(pathlib.Path(lock_location))

    log("INFO", "Jobs added/updated " + str(jobs_updated))
    log("INFO", "Indexing Complete")
Example #9
0
def verify_password(username, password):
    if username in Config.get('Users'):
        return Config.get('Users').get(username) == hashlib.md5(password.encode()).hexdigest()
    return False
Example #10
0
def determine_indexing(db):
    # Entry point for indexing
    jobs_updated = 0

    log("INFO", "Begin Indexing")

    # Get a dictionary of jobs from the GWMS_Log_Dir directory
    tree = directory_jobs(Config.get('GWMS_Log_Dir'))

    log("INFO", "Directory Listing Completion")

    # List to be exported
    job_index_list = []

    # Iterate through each job checking the database if it needs to be updated
    for job_name, job_data in tree.items():
        # Skip entries that are missing an '.err'/'.out'. file
        if "err_file_path" not in job_data or "out_file_path" not in job_data:
            log(
                "INFO", "Missing ERR/OUT file for entry - jobID: " +
                job_data["entry_name"] + " - " + str(job_data["job_id"]))
            continue

        if db.needs_update(job_data):
            # Check if the file has certain logs within it
            found_logs = {
                "MasterLog": False,
                "StartdLog": False,
                "StarterLog": False,
                "StartdHistoryLog": False,
                "glidein_activity": False
            }
            if job_data['err_file_size'] != 0:
                with open(job_data["err_file_path"], 'rb',
                          0) as file, mmap.mmap(file.fileno(),
                                                0,
                                                access=mmap.ACCESS_READ) as s:
                    if s.find(b'MasterLog\n========') != -1:
                        found_logs["MasterLog"] = True
                    if s.find(b'StartdLog\n========') != -1:
                        found_logs["StartdLog"] = True
                    if s.find(b'StarterLog\n========') != -1:
                        found_logs["StarterLog"] = True
                    if s.find(b'StartdHistoryLog\n========') != -1:
                        found_logs["StartdHistoryLog"] = True
                    if s.find(
                            b'=== Encoded XML description of glidein activity ==='
                    ) != -1:
                        found_logs["glidein_activity"] = True

            # Add found logs into the job data
            job_data.update(found_logs)

            # Add the job to list to be indexed
            job_index_list.append(job_data)

            # Job added/updated
            jobs_updated += 1

    log("INFO", "Jobs to be added/updated " + str(jobs_updated))

    return job_index_list
Example #11
0
    def __init__(self):
        # Connect to SQLite unless specified otherwise in the config file
        if Config.db("type") == "sqlite":
            # SQLite Database
            try:
                os.mkdir(Config.db("dir"))
                log("INFO", "Creating new directory for SQLite DB")
            except FileExistsError:
                pass
            self.conn = sqlite3.connect(os.path.join(Config.db("dir"), "%s.sqlite" % Config.db("db_name")))

            # Check if index table exists
            db_cursor = self.conn.cursor()
            db_cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='file_index';")

            if db_cursor.fetchone() is None:
                # It doesn't, create it
                log("INFO", "Creating new SQLite database")

                script_file = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "sqliteTableCreation.sql"),
                                   'r')
                script = script_file.read()
                script_file.close()
                db_cursor.executescript(script)
        elif Config.db("type") == "mysql":
            # MySQL Database
            if not MYSQL_AVAILABLE:
                log("ERROR", "MySQL database selected but there is no MySQL connector")
                raise ImportError("Module not found: mysql.connector")
            try:
                self.conn = mysql.connector.connect(
                    host=Config.db("host"),
                    user=Config.db("user"),
                    passwd=Config.db("pass"),
                    database=Config.db("db_name")
                )

                mycursor = self.conn.cursor()
            except mysql.connector.errors.ProgrammingError:
                # Create the database
                log("INFO", "Creating new MySQL Database")
                mydb = mysql.connector.connect(
                    host=Config.db("host"),
                    user=Config.db("user"),
                    passwd=Config.db("pass")
                )

                mycursor = mydb.cursor()
                mycursor.execute("CREATE DATABASE " + Config.db("db_name"))

                self.conn = mysql.connector.connect(
                    host=Config.db("host"),
                    user=Config.db("user"),
                    passwd=Config.db("pass"),
                    database=Config.db("db_name")
                )

                mycursor = self.conn.cursor()

            # Check if the table exists
            mycursor.execute("SHOW TABLES")

            if ('file_index',) not in mycursor:
                # Create table
                log("INFO", "Creating MySQL File Index table")
                script_file = open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "mysqlTableCreation.sql"),
                                   'r')
                script = script_file.read()
                script_file.close()
                mycursor.execute(script)
        else:
            log("ERROR", "No valid database selected (%s)" % Config.db("type"))
            raise ImportError("Invalid ")