def main(): """This is the main function of edda. It takes log files as command line arguments and sends each line of each log through a series of parses. Then, this function sends the parsed-out information through several rounds of post-processing, and finally to a JavaScript client that displays a visual representation of the files. """ if (len(sys.argv) < 2): print "Missing argument: please provide a filename" return # argparse methods parser = argparse.ArgumentParser( description = 'Process and visualize log files from mongo servers') parser.add_argument('--port') parser.add_argument('--host') parser.add_argument('--verbose', '-v', action='count') parser.add_argument('--version', action='version', version="Running edda version {0}".format(__version__)) parser.add_argument('--db', '-d') parser.add_argument('--collection', '-c') parser.add_argument('filename', nargs='+') namespace = parser.parse_args() # handle captured arguments if namespace.port: port = namespace.port[0] else: port = '27017' if namespace.host: host = namespace.host[0] place = host.find(":") if place >= 0: port = host[place + 1:] host = host[:place] else: host = 'localhost' uri = host + ":" + port uri = "mongodb://" + uri # generate a unique collection name, if not specified by user if namespace.collection: coll_name = namespace.collection[0] else: coll_name = str(objectid.ObjectId()) # for easier debugging: print "edda is storing data under collection name {0}".format(coll_name); # configure logger # use to switch from console to file: logname = "edda_logs/" + name + ".log" if not namespace.verbose: logging.basicConfig(level=logging.ERROR) elif namespace.verbose == 1: logging.basicConfig(level=logging.WARNING) elif namespace.verbose == 2: logging.basicConfig(level=logging.INFO) elif namespace.verbose >= 3: logging.basicConfig(level=logging.DEBUG) global LOGGER LOGGER = logging.getLogger(__name__) # exit gracefully if no server is running try: connection = Connection(uri) except: LOGGER.critical("Unable to connect to {0}, exiting".format(uri)) return if namespace.db: db = connection[namespace.db[0]] else: db = connection.edda entries = db[coll_name].entries servers = db[coll_name].servers now = datetime.now() name = now.strftime("edda_%m_%d_%Y_at_%H_%M_%S") # some verbose comments LOGGER.info('Connection opened with edda mongod, using {0} on port {1}' .format(host, port)) LOGGER.debug('Writing to db edda, collection {0}\nPreparing to parse log files' .format(name)) # read in from each log file file_names = [] f = None for arg in namespace.filename: if arg in file_names: LOGGER.warning("Skipping duplicate file {0}".format(arg)) continue try: f = open(arg, 'r') except IOError as e: print "Error: Unable to read file {0}".format(arg) print e if f: f.close() return file_names.append(arg) counter = 0 stored = 0 server_num = -1 LOGGER.warning('Reading from logfile {0}...'.format(arg)) previous = "none" #f is the file names for line in f: counter += 1 # handle restart lines if '******' in line: LOGGER.debug("Skipping restart message") continue # skip blank lines if (len(line) > 1): date = date_parser(line) if not date: LOGGER.warning("Line {0} has a malformatted date, skipping" .format(counter)) continue doc = traffic_control(line, date) if doc: # see if we have captured a new server address # if server_num is at -1, this is a new server if (doc["type"] == "init" and doc["info"]["subtype"] == "startup"): LOGGER.debug("Found addr {0} for server {1} from startup msg" .format(doc["info"]["addr"], server_num)) # if we have don't yet have a server number: if server_num == -1: server_num = get_server_num( str(doc["info"]["addr"]), True, servers) else: assign_address(server_num, str(doc["info"]["addr"]), True, servers) if (doc["type"] == "status" and "addr" in doc["info"]): LOGGER.debug("Found addr {0} for server {1} from rs_status msg" .format(doc["info"]["addr"], server_num)) if server_num == -1: server_num = get_server_num( str(doc["info"]["server"]), False, servers) else: assign_address(server_num, str(doc["info"]["server"]), False, servers) # is there a server number for us yet? If not, get one if server_num == -1: server_num = get_server_num("unknown", False, servers) # skip repetitive 'exit' messages if doc["type"] == "exit" and previous == "exit": continue doc["origin_server"] = server_num entries.insert(doc) LOGGER.debug('Stored line {0} of {1} to db'.format(counter, arg)) stored += 1 previous = doc["type"] LOGGER.warning('-' * 64) LOGGER.warning('Finished running on {0}'.format(arg)) LOGGER.info('Stored {0} of {1} log lines to db'.format(stored, counter)) LOGGER.warning('=' * 64) # if no servers or meaningful events were found, exit if servers.count() == 0: LOGGER.critical("No servers were found, exiting.") return if entries.count() == 0: LOGGER.critical("No meaningful events were found, exiting.") return LOGGER.info("Finished reading from log files, performing post processing") LOGGER.info('-' * 64) if len(namespace.filename) > 1: LOGGER.info("Attempting to resolve server names") result = address_matchup(db, coll_name) if result == 1: LOGGER.info("Server names successfully resolved") else: LOGGER.warning("Could not resolve server names") LOGGER.info('-' * 64) # event matchup LOGGER.info("Matching events across documents and logs...") events = event_matchup(db, coll_name) LOGGER.info("Completed event matchup") LOGGER.info('-' * 64) # generate frames LOGGER.info("Converting events into frames...") frames = generate_frames(events, db, coll_name) LOGGER.info("Completed frame conversion") LOGGER.info('-' * 64) # send to server LOGGER.info("Sending frames to server...") send_to_js(frames, get_server_names(db, coll_name), get_admin_info(file_names)) LOGGER.info('-' * 64) LOGGER.info('=' * 64) LOGGER.warning('Completed post processing.\nExiting.')
def main(): """This is the main function of edda. It takes log files as command line arguments and sends each line of each log through a series of parses. Then, this function sends the parsed-out information through several rounds of post-processing, and finally to a JavaScript client that displays a visual representation of the files. """ if (len(sys.argv) < 2): print "Missing argument: please provide a filename" return mongo_version = "" # argparse methods parser = argparse.ArgumentParser( description='Process and visualize log files from mongo servers') parser.add_argument('--port', help="Specify the MongoDb port to use") parser.add_argument('--http_port', help="Specify the HTTP Port") parser.add_argument('--host', help="Specify host") parser.add_argument('--verbose', '-v', action='count') parser.add_argument('--json', help="json file") parser.add_argument('--version', action='version', version="Running edda version {0}".format(__version__)) parser.add_argument('--db', '-d', help="Specify DB name") parser.add_argument('--collection', '-c') # Fixed parser.add_argument('filename', nargs='+') namespace = parser.parse_args() # handle captured arguments if namespace.json: has_json = True else: has_json = False if namespace.http_port: http_port = namespace.http_port else: http_port = '28000' if namespace.port: port = namespace.port else: port = '27017' if namespace.host: host = namespace.host place = host.find(":") if place >= 0: port = host[place + 1:] host = host[:place] else: host = 'localhost' uri = host + ":" + port uri = "mongodb://" + uri # generate a unique collection name, if not specified by user if namespace.collection: coll_name = namespace.collection else: coll_name = str(objectid.ObjectId()) # for easier debugging: # configure logger # use to switch from console to file: logname = "edda_logs/" + name + ".log" if not namespace.verbose: logging.basicConfig(level=logging.ERROR) elif namespace.verbose == 1: logging.basicConfig(level=logging.WARNING) elif namespace.verbose == 2: logging.basicConfig(level=logging.INFO) elif namespace.verbose >= 3: logging.basicConfig(level=logging.DEBUG) global LOGGER LOGGER = logging.getLogger(__name__) # exit gracefully if no server is running try: connection = Connection(uri) except: LOGGER.critical("Unable to connect to {0}, exiting".format(uri)) return if namespace.db: db = connection[namespace.db[0]] else: db = connection.edda entries = db[coll_name].entries servers = db[coll_name].servers now = datetime.now() # some verbose comments LOGGER.info('Connection opened with edda mongod, using {0} on port {1}' .format(host, port)) # read in from each log file file_names = [] f = None previous_version = False version_change = False first = True for arg in namespace.filename: gzipped = False if ".json" in arg: print "\n\nFound file {}, of type 'json'".format(arg) if not first: print "Ignoring previously processed files" " and loading configuration found in '.json' file." json_file = open(arg, "r") json_obj = json.loads(json_file.read()) has_json = True break first = False if ".gz" in arg: opened_file = gzip.open(arg, 'r') gzipped = True if arg in file_names: LOGGER.warning("\nSkipping duplicate file {0}".format(arg)) continue try: f = open(arg, 'r') except IOError as e: print "\nError: Unable to read file {0}".format(arg) print e file_names.append(arg) counter = 0 stored = 0 server_num = -1 LOGGER.warning('Reading from logfile {0}...'.format(arg)) previous = "none" print "\nCurrently parsing log-file: {}".format(arg) total_characters = 0 total_chars = 0 # Build log lines out of characters if gzipped: text = opened_file.read() #for char in text: total_chars = len(text) array = text.split('\n') file_lines = array else: file_lines = f LOGGER.debug(("Finished processing gzipped with a time of: " + str(datetime.now() - now))) file_info = os.stat(arg) total = 0 total = file_info.st_size # Make sure the progress bar works with gzipped file. if gzipped: intermediate_total = total_chars total = int(intermediate_total * .98) point = total / 100 increment = total / 100 old_total = -1 for line in file_lines: ratio = total_characters / point total_characters += len(line) if ratio >= 99: percent_string = "100" else: percent_string = str(total_characters / point) if ratio != old_total or ratio >= 99: sys.stdout.flush() sys.stdout.write("\r[" + "=" * ( (total_characters) / increment) + " " * ( (total - (total_characters)) / increment) + "]" + percent_string + "%") old_total = ratio counter += 1 # handle restart lines if '******' in line: LOGGER.debug("Skipping restart message") continue # skip blank lines if (len(line) > 1): date = date_parser(line) if not date: LOGGER.warning("Line {0} has a malformatted date, skipping" .format(counter)) continue doc = traffic_control(line, date) if doc: # see if we have captured a new server address # if server_num is at -1, this is a new server if (doc["type"] == "init" and doc["info"]["subtype"] == "startup"): LOGGER.debug("Found addr {0} for server {1} from startup msg" .format(doc["info"]["addr"], server_num)) # if we have don't yet have a server number: if server_num == -1: server_num = get_server_num( str(doc["info"]["addr"]), True, servers) else: assign_address(server_num, str(doc["info"]["addr"]), True, servers) if (doc["type"] == "status" and "addr" in doc["info"]): LOGGER.debug("Found addr {0} for server {1} from rs_status msg" .format(doc["info"]["addr"], server_num)) if server_num == -1: server_num = get_server_num( str(doc["info"]["server"]), False, servers) else: assign_address(server_num, str(doc["info"]["server"]), False, servers) # is there a server number for us yet? If not, get one if server_num == -1: server_num = get_server_num("unknown", False, servers) if doc["type"] == "version": update_mongo_version(doc["version"], server_num, servers) if not previous_version: mongo_version = doc["version"] previous_version = True elif previous_version: if doc["version"] != mongo_version: version_change = True mongo_version = doc["version"] # skip repetitive 'exit' messages if doc["type"] == "exit" and previous == "exit": continue doc["origin_server"] = server_num entries.insert(doc) LOGGER.debug('Stored line {0} of {1} to db'.format(counter, arg)) previous = doc["type"] LOGGER.warning('-' * 64) LOGGER.warning('Finished running on {0}'.format(arg)) LOGGER.info('Stored {0} of {1} log lines to db'.format(stored, counter)) LOGGER.warning('=' * 64) LOGGER.debug(("Finished processing everything with a time of: " + str(datetime.now() - now))) if version_change == True: print "\n VERSION CHANGE DETECTED!!" print mongo_version # if no servers or meaningful events were found, exit if servers.count() == 0 and has_json == False: LOGGER.critical("No servers were found, exiting.") return if entries.count() == 0 and has_json == False: LOGGER.critical("No meaningful events were found, exiting.") return LOGGER.info("Finished reading from log files, performing post processing") LOGGER.info('-' * 64) LOGGER.debug("\nTotal processing time for log files: " + str(datetime.now() - now)) # Perform address matchup if len(namespace.filename) > 1: LOGGER.info("Attempting to resolve server names") result = address_matchup(db, coll_name) if result == 1: LOGGER.info("Server names successfully resolved") else: LOGGER.warning("Could not resolve server names") LOGGER.info('-' * 64) # Event matchup LOGGER.info("Matching events across documents and logs...") events = event_matchup(db, coll_name) LOGGER.info("Completed event matchup") LOGGER.info('-' * 64) # Create json file if not has_json: print "\nEdda is storing data under collection name {0}".format(coll_name) frames = generate_frames(events, db, coll_name) names = get_server_names(db, coll_name) admin = get_admin_info(file_names) large_json = open(coll_name + ".json", "w") json.dump(dicts_to_json(frames, names, admin), large_json) # No need to create json, one already provided. elif has_json: frames, names, admin = json_to_dicts(json_obj) send_to_js(frames, names, admin, http_port) LOGGER.info('-' * 64) LOGGER.info('=' * 64) LOGGER.warning('Completed post processing.\nExiting.') # Drop the collections created for this run. db.drop_collection(coll_name + ".servers") db.drop_collection(coll_name + ".entries")
def main(): if (len(sys.argv) < 2): print "Missing argument: please provide a filename" return # parse command-line arguments parser = argparse.ArgumentParser( description='Process and visualize log files from mongo servers') parser.add_argument('--port', help="Specify the MongoDb port to use") parser.add_argument('--http_port', help="Specify the HTTP Port") parser.add_argument('--hint', help="Provide self-name to network-name" " translations for the servers in this cluster. " "Hint should be provided as a string of the form " "'<self-name1>/<network-name1>,<self-name2>/<network-name2>,...") parser.add_argument('--ignore_unclaimed') parser.add_argument('--host', help="Specify host") parser.add_argument('--json', help="json file") parser.add_argument('--verbose', '-v', action='count') parser.add_argument('--version', action='version', version="Running edda version {0}".format(__version__)) parser.add_argument('--db', '-d', help="Specify DB name") parser.add_argument('--collection', '-c') parser.add_argument('filename', nargs='+') namespace = parser.parse_args() has_json = namespace.json or False http_port = namespace.http_port or '28000' port = namespace.port or '27017' hint = namespace.hint or "" ignore_unclaimed = namespace.ignore_unclaimed or False coll_name = namespace.collection or str(objectid.ObjectId()) if namespace.host: host = namespace.host m = host.find(":") if m > -1: port = host[m + 1] host = host[:m] else: host = 'localhost' uri = "mongodb://" + host + ":" + port # configure logging if not namespace.verbose: logging.basicConfig(level=logging.ERROR) elif namespace.verbose == 1: logging.basicConfig(level=logging.WARNING) elif namespace.verbose == 2: logging.basicConfig(level=logging.INFO) elif namespace.verbose >= 3: logging.basicConfig(level=logging.DEBUG) global LOGGER LOGGER = logging.getLogger(__name__) # exit gracefully if no server is running try: connection = MongoClient(uri) except: LOGGER.critical("Unable to connect to {0}, exiting".format(uri)) return if namespace.db: db = connection[namespace.db[0]] else: db = connection.edda entries = db[coll_name].entries servers = db[coll_name].servers config = db[coll_name].config # first, see if we've gotten any .json files for file in namespace.filename: if ".json" in file: LOGGER.debug("Loading in edda data from {0}".format(file)) json_file = open(file, "r") data = json.loads(json_file.read()) send_to_js(data["frames"], data["admin"], http_port) edda_cleanup(db, coll_name) return # were we supposed to have a .json file? if has_json: LOGGER.critical("--json option used, but no .json file given") return # run full log processing processed_files = [] for filename in namespace.filename: if filename in processed_files: continue logs = extract_log_lines(filename) process_log(logs, servers, entries, config) processed_files.append(filename) # anything to show? if servers.count() == 0: LOGGER.critical("No servers were found, exiting") return if entries.count() == 0: LOGGER.critical("No meaningful events were found, exiting") return # match up addresses if len(namespace.filename) > 1: if address_matchup(db, coll_name, hint) != 1: LOGGER.critical("Could not resolve server names. Edda may work better if you provide a hint.") #return # match up events events = event_matchup(db, coll_name) frames = generate_frames(events, db, coll_name) server_config = get_server_config(servers, config, ignore_unclaimed) update_frames_with_config(frames, server_config) admin = get_admin_info(processed_files) LOGGER.critical("\nEdda is storing data under collection name {0}" .format(coll_name)) edda_json = open(coll_name + ".json", "w") json.dump(format_json(frames, admin), edda_json) send_to_js(frames, admin, http_port) edda_cleanup(db, coll_name)
def main(): if (len(sys.argv) < 2): print "Missing argument: please provide a filename" return # parse command-line arguments parser = argparse.ArgumentParser( description='Process and visualize log files from mongo servers') parser.add_argument('--port', help="Specify the MongoDb port to use") parser.add_argument('--http_port', help="Specify the HTTP Port") parser.add_argument('--host', help="Specify host") parser.add_argument('--json', help="json file") parser.add_argument('--verbose', '-v', action='count') parser.add_argument('--version', action='version', version="Running edda version {0}".format(__version__)) parser.add_argument('--db', '-d', help="Specify DB name") parser.add_argument('--collection', '-c') parser.add_argument('filename', nargs='+') namespace = parser.parse_args() has_json = namespace.json or False http_port = namespace.http_port or '28000' port = namespace.port or '27017' coll_name = namespace.collection or str(objectid.ObjectId()) if namespace.host: host = namespace.host m = host.find(":") if m > -1: port = host[m + 1] host = host[:m] else: host = 'localhost' uri = "mongodb://" + host + ":" + port # configure logging if not namespace.verbose: logging.basicConfig(level=logging.ERROR) elif namespace.verbose == 1: logging.basicConfig(level=logging.WARNING) elif namespace.verbose == 2: logging.basicConfig(level=logging.INFO) elif namespace.verbose >= 3: logging.basicConfig(level=logging.DEBUG) global LOGGER LOGGER = logging.getLogger(__name__) # exit gracefully if no server is running try: connection = Connection(uri) except: LOGGER.critical("Unable to connect to {0}, exiting".format(uri)) return if namespace.db: db = connection[namespace.db[0]] else: db = connection.edda entries = db[coll_name].entries servers = db[coll_name].servers config = db[coll_name].config # first, see if we've gotten any .json files for file in namespace.filename: if ".json" in file: LOGGER.debug("Loading in edda data from {0}".format(file)) json_file = open(file, "r") data = json.loads(json_file.read()) send_to_js(data["frames"], data["admin"], http_port) edda_cleanup(db, coll_name) return # were we supposed to have a .json file? if has_json: LOGGER.critical("--json option used, but no .json file given") return # run full log processing processed_files = [] for filename in namespace.filename: if filename in processed_files: continue logs = extract_log_lines(filename) process_log(logs, servers, entries, config) processed_files.append(filename) # anything to show? if servers.count() == 0: LOGGER.critical("No servers were found, exiting") return if entries.count() == 0: LOGGER.critical("No meaningful events were found, exiting") return # match up addresses if len(namespace.filename) > 1: if address_matchup(db, coll_name) != 1: LOGGER.warning("Could not resolve server names") # match up events events = event_matchup(db, coll_name) frames = generate_frames(events, db, coll_name) server_config = get_server_config(servers, config) update_frames_with_config(frames, server_config) admin = get_admin_info(processed_files) LOGGER.critical( "\nEdda is storing data under collection name {0}".format(coll_name)) edda_json = open(coll_name + ".json", "w") json.dump(format_json(frames, admin), edda_json) send_to_js(frames, admin, http_port) edda_cleanup(db, coll_name)