Example #1
0
def main():
    """This is the main function of edda.  It takes log
    files as command line arguments and sends each
    line of each log through a series of parses.  Then, this
    function sends the parsed-out information through several
    rounds of post-processing, and finally to a JavaScript
    client that displays a visual representation of the files.
    """

    if (len(sys.argv) < 2):
        print "Missing argument: please provide a filename"
        return

    # argparse methods
    parser = argparse.ArgumentParser(
        description = 'Process and visualize log files from mongo servers')
    parser.add_argument('--port')
    parser.add_argument('--host')
    parser.add_argument('--verbose', '-v', action='count')
    parser.add_argument('--version', action='version',
                        version="Running edda version {0}".format(__version__))
    parser.add_argument('--db', '-d')
    parser.add_argument('--collection', '-c')
    parser.add_argument('filename', nargs='+')
    namespace = parser.parse_args()

    # handle captured arguments
    if namespace.port:
        port = namespace.port[0]
    else:
        port = '27017'
    if namespace.host:
        host = namespace.host[0]
        place = host.find(":")
        if place >= 0:
            port = host[place + 1:]
            host = host[:place]
    else:
        host = 'localhost'
    uri = host + ":" + port
    uri = "mongodb://" + uri

    # generate a unique collection name, if not specified by user
    if namespace.collection:
        coll_name = namespace.collection[0]
    else:
        coll_name = str(objectid.ObjectId())
    # for easier debugging:
    print "edda is storing data under collection name {0}".format(coll_name);

    # configure logger
    # use to switch from console to file: logname = "edda_logs/" + name + ".log"
    if not namespace.verbose:
        logging.basicConfig(level=logging.ERROR)
    elif namespace.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif namespace.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif namespace.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)

    global LOGGER
    LOGGER = logging.getLogger(__name__)

    # exit gracefully if no server is running
    try:
        connection = Connection(uri)
    except:
        LOGGER.critical("Unable to connect to {0}, exiting".format(uri))
        return

    if namespace.db:
        db = connection[namespace.db[0]]
    else:
        db = connection.edda
    entries = db[coll_name].entries
    servers = db[coll_name].servers

    now = datetime.now()
    name = now.strftime("edda_%m_%d_%Y_at_%H_%M_%S")

    # some verbose comments
    LOGGER.info('Connection opened with edda mongod, using {0} on port {1}'
                .format(host, port))
    LOGGER.debug('Writing to db edda, collection {0}\nPreparing to parse log files'
                 .format(name))

    # read in from each log file
    file_names = []
    f = None
    for arg in namespace.filename:
        if arg in file_names:
            LOGGER.warning("Skipping duplicate file {0}".format(arg))
            continue
        try:
            f = open(arg, 'r')
        except IOError as e:
            print "Error: Unable to read file {0}".format(arg)
            print e
            if f:
                f.close()
            return
        file_names.append(arg)
        counter = 0
        stored = 0
        server_num = -1

        LOGGER.warning('Reading from logfile {0}...'.format(arg))
        previous = "none"
        #f is the file names
        for line in f:
            counter += 1
            # handle restart lines
            if '******' in line:
                LOGGER.debug("Skipping restart message")
                continue
            # skip blank lines
            if (len(line) > 1):
                date = date_parser(line)
                if not date:
                    LOGGER.warning("Line {0} has a malformatted date, skipping"
                                   .format(counter))
                    continue
                doc = traffic_control(line, date)
                if doc:
                    # see if we have captured a new server address
                    # if server_num is at -1, this is a new server
                    if (doc["type"] == "init" and
                        doc["info"]["subtype"] == "startup"):
                        LOGGER.debug("Found addr {0} for server {1} from startup msg"
                                     .format(doc["info"]["addr"], server_num))
                        # if we have don't yet have a server number:
                        if server_num == -1:
                            server_num = get_server_num(
                                str(doc["info"]["addr"]), True, servers)
                        else:
                            assign_address(server_num,
                                           str(doc["info"]["addr"]), True, servers)
                    if (doc["type"] == "status" and
                        "addr" in doc["info"]):
                        LOGGER.debug("Found addr {0} for server {1} from rs_status msg"
                                     .format(doc["info"]["addr"], server_num))
                        if server_num == -1:
                            server_num = get_server_num(
                                str(doc["info"]["server"]), False, servers)
                        else:
                            assign_address(server_num,
                                           str(doc["info"]["server"]), False, servers)
                    # is there a server number for us yet?  If not, get one
                    if server_num == -1:
                        server_num = get_server_num("unknown", False, servers)
                    # skip repetitive 'exit' messages
                    if doc["type"] == "exit" and previous == "exit":
                        continue
                    doc["origin_server"] = server_num
                    entries.insert(doc)
                    LOGGER.debug('Stored line {0} of {1} to db'.format(counter, arg))
                    stored += 1
                    previous = doc["type"]
        LOGGER.warning('-' * 64)
        LOGGER.warning('Finished running on {0}'.format(arg))
        LOGGER.info('Stored {0} of {1} log lines to db'.format(stored, counter))
        LOGGER.warning('=' * 64)

    # if no servers or meaningful events were found, exit
    if servers.count() == 0:
        LOGGER.critical("No servers were found, exiting.")
        return
    if entries.count() == 0:
        LOGGER.critical("No meaningful events were found, exiting.")
        return
    LOGGER.info("Finished reading from log files, performing post processing")
    LOGGER.info('-' * 64)

    if len(namespace.filename) > 1:
        LOGGER.info("Attempting to resolve server names")
        result = address_matchup(db, coll_name)
        if result == 1:
            LOGGER.info("Server names successfully resolved")
        else:
            LOGGER.warning("Could not resolve server names")
        LOGGER.info('-' * 64)

    # event matchup
    LOGGER.info("Matching events across documents and logs...")
    events = event_matchup(db, coll_name)
    LOGGER.info("Completed event matchup")
    LOGGER.info('-' * 64)

    # generate frames
    LOGGER.info("Converting events into frames...")
    frames = generate_frames(events, db, coll_name)
    LOGGER.info("Completed frame conversion")
    LOGGER.info('-' * 64)

    # send to server
    LOGGER.info("Sending frames to server...")
    send_to_js(frames, get_server_names(db, coll_name),
               get_admin_info(file_names))
    LOGGER.info('-' * 64)
    LOGGER.info('=' * 64)
    LOGGER.warning('Completed post processing.\nExiting.')
Example #2
0
def main():
    """This is the main function of edda.  It takes log
    files as command line arguments and sends each
    line of each log through a series of parses.  Then, this
    function sends the parsed-out information through several
    rounds of post-processing, and finally to a JavaScript
    client that displays a visual representation of the files.
    """

    if (len(sys.argv) < 2):
        print "Missing argument: please provide a filename"
        return
    mongo_version = ""
    # argparse methods
    parser = argparse.ArgumentParser(
    description='Process and visualize log files from mongo servers')
    parser.add_argument('--port', help="Specify the MongoDb port to use")
    parser.add_argument('--http_port', help="Specify the HTTP Port")
    parser.add_argument('--host', help="Specify host")
    parser.add_argument('--verbose', '-v', action='count')
    parser.add_argument('--json', help="json file")
    parser.add_argument('--version', action='version',
                        version="Running edda version {0}".format(__version__))
    parser.add_argument('--db', '-d', help="Specify DB name")
    parser.add_argument('--collection', '-c')  # Fixed
    parser.add_argument('filename', nargs='+')
    namespace = parser.parse_args()

    # handle captured arguments
    if namespace.json:
        has_json = True
    else:
        has_json = False
    if namespace.http_port:
        http_port = namespace.http_port
    else:
        http_port = '28000'
    if namespace.port:
        port = namespace.port
    else:
        port = '27017'
    if namespace.host:
        host = namespace.host
        place = host.find(":")
        if place >= 0:
            port = host[place + 1:]
            host = host[:place]
    else:
        host = 'localhost'
    uri = host + ":" + port
    uri = "mongodb://" + uri

    # generate a unique collection name, if not specified by user
    if namespace.collection:
        coll_name = namespace.collection
    else:
        coll_name = str(objectid.ObjectId())
    # for easier debugging:

    # configure logger
    # use to switch from console to file: logname = "edda_logs/" + name + ".log"
    if not namespace.verbose:
        logging.basicConfig(level=logging.ERROR)
    elif namespace.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif namespace.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif namespace.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)

    global LOGGER
    LOGGER = logging.getLogger(__name__)

    # exit gracefully if no server is running
    try:
        connection = Connection(uri)
    except:
        LOGGER.critical("Unable to connect to {0}, exiting".format(uri))
        return

    if namespace.db:
        db = connection[namespace.db[0]]
    else:
        db = connection.edda
    entries = db[coll_name].entries
    servers = db[coll_name].servers

    now = datetime.now()

    # some verbose comments
    LOGGER.info('Connection opened with edda mongod, using {0} on port {1}'
                .format(host, port))

    # read in from each log file
    file_names = []
    f = None
    previous_version = False
    version_change = False
    first = True
    for arg in namespace.filename:
        gzipped = False
        if ".json" in arg:
            print "\n\nFound file {}, of type 'json'".format(arg)
            if not first:
                print "Ignoring previously processed files"
                " and loading configuration found in '.json' file."
            json_file = open(arg, "r")
            json_obj = json.loads(json_file.read())
            has_json = True
            break
        first = False
        if ".gz" in arg:
            opened_file = gzip.open(arg, 'r')
            gzipped = True
        if arg in file_names:
            LOGGER.warning("\nSkipping duplicate file {0}".format(arg))
            continue
        try:
            f = open(arg, 'r')
        except IOError as e:
            print "\nError: Unable to read file {0}".format(arg)
            print e
        file_names.append(arg)
        counter = 0
        stored = 0
        server_num = -1

        LOGGER.warning('Reading from logfile {0}...'.format(arg))
        previous = "none"
        print "\nCurrently parsing log-file: {}".format(arg)
        total_characters = 0
        total_chars = 0

        # Build log lines out of characters
        if gzipped:
            text = opened_file.read()
            #for char in text:
            total_chars = len(text)
            array = text.split('\n')
            file_lines = array
        else:
            file_lines = f

        LOGGER.debug(("Finished processing gzipped with a time of: " + str(datetime.now() - now)))
        file_info = os.stat(arg)
        total = 0
        total = file_info.st_size
        # Make sure the progress bar works with gzipped file.
        if gzipped:
            intermediate_total = total_chars
            total = int(intermediate_total * .98)

        point = total / 100
        increment = total / 100
        old_total = -1
        for line in file_lines:
            ratio = total_characters / point
            total_characters += len(line)
            if ratio >= 99:
                percent_string = "100"
            else:
                percent_string = str(total_characters / point)

            if ratio != old_total or ratio >= 99:
                sys.stdout.flush()
                sys.stdout.write("\r[" + "=" * (
                    (total_characters) / increment) + " " * (
                    (total - (total_characters)) / increment) + "]" + percent_string + "%")
                old_total = ratio

            counter += 1
            # handle restart lines
            if '******' in line:
                LOGGER.debug("Skipping restart message")
                continue
            # skip blank lines
            if (len(line) > 1):
                date = date_parser(line)
                if not date:
                    LOGGER.warning("Line {0} has a malformatted date, skipping"
                                   .format(counter))
                    continue
                doc = traffic_control(line, date)
                if doc:
                    # see if we have captured a new server address
                    # if server_num is at -1, this is a new server
                    if (doc["type"] == "init" and
                        doc["info"]["subtype"] == "startup"):
                        LOGGER.debug("Found addr {0} for server {1} from startup msg"
                                     .format(doc["info"]["addr"], server_num))
                        # if we have don't yet have a server number:
                        if server_num == -1:
                            server_num = get_server_num(
                                str(doc["info"]["addr"]), True, servers)
                        else:
                            assign_address(server_num,
                                           str(doc["info"]["addr"]), True, servers)
                    if (doc["type"] == "status" and
                        "addr" in doc["info"]):
                        LOGGER.debug("Found addr {0} for server {1} from rs_status msg"
                                     .format(doc["info"]["addr"], server_num))
                        if server_num == -1:
                            server_num = get_server_num(
                                str(doc["info"]["server"]), False, servers)
                        else:
                            assign_address(server_num,
                                           str(doc["info"]["server"]), False, servers)
                    # is there a server number for us yet?  If not, get one
                    if server_num == -1:
                        server_num = get_server_num("unknown", False, servers)

                    if doc["type"] == "version":
                        update_mongo_version(doc["version"], server_num, servers)
                        if not previous_version:
                            mongo_version = doc["version"]
                            previous_version = True
                        elif previous_version:
                            if doc["version"] != mongo_version:
                                version_change = True
                                mongo_version = doc["version"]

                    # skip repetitive 'exit' messages
                    if doc["type"] == "exit" and previous == "exit":
                        continue
                    doc["origin_server"] = server_num
                    entries.insert(doc)
                    LOGGER.debug('Stored line {0} of {1} to db'.format(counter, arg))
                    previous = doc["type"]
        LOGGER.warning('-' * 64)
        LOGGER.warning('Finished running on {0}'.format(arg))
        LOGGER.info('Stored {0} of {1} log lines to db'.format(stored, counter))
        LOGGER.warning('=' * 64)
    LOGGER.debug(("Finished processing everything with a time of: " + str(datetime.now() - now)))
    if version_change == True:
        print "\n VERSION CHANGE DETECTED!!"
        print mongo_version

    # if no servers or meaningful events were found, exit
    if servers.count() == 0 and has_json == False:
        LOGGER.critical("No servers were found, exiting.")
        return
    if entries.count() == 0 and has_json == False:
        LOGGER.critical("No meaningful events were found, exiting.")
        return

    LOGGER.info("Finished reading from log files, performing post processing")
    LOGGER.info('-' * 64)

    LOGGER.debug("\nTotal processing time for log files: " + str(datetime.now() - now))

    # Perform address matchup
    if len(namespace.filename) > 1:
        LOGGER.info("Attempting to resolve server names")
        result = address_matchup(db, coll_name)
        if result == 1:
            LOGGER.info("Server names successfully resolved")
        else:
            LOGGER.warning("Could not resolve server names")
        LOGGER.info('-' * 64)

    # Event matchup
    LOGGER.info("Matching events across documents and logs...")
    events = event_matchup(db, coll_name)
    LOGGER.info("Completed event matchup")
    LOGGER.info('-' * 64)

    # Create json file
    if not has_json:
        print "\nEdda is storing data under collection name {0}".format(coll_name)
        frames = generate_frames(events, db, coll_name)
        names = get_server_names(db, coll_name)
        admin = get_admin_info(file_names)
        large_json = open(coll_name + ".json", "w")
        json.dump(dicts_to_json(frames, names, admin), large_json)
    # No need to create json, one already provided.
    elif has_json:
        frames, names, admin = json_to_dicts(json_obj)
    send_to_js(frames, names, admin, http_port)
    LOGGER.info('-' * 64)
    LOGGER.info('=' * 64)
    LOGGER.warning('Completed post processing.\nExiting.')

    # Drop the collections created for this run.
    db.drop_collection(coll_name + ".servers")
    db.drop_collection(coll_name + ".entries")
Example #3
0
def main():
    if (len(sys.argv) < 2):
        print "Missing argument: please provide a filename"
        return

    # parse command-line arguments
    parser = argparse.ArgumentParser(
    description='Process and visualize log files from mongo servers')
    parser.add_argument('--port', help="Specify the MongoDb port to use")
    parser.add_argument('--http_port', help="Specify the HTTP Port")
    parser.add_argument('--hint', help="Provide self-name to network-name"
                        " translations for the servers in this cluster. "
                        "Hint should be provided as a string of the form "
                        "'<self-name1>/<network-name1>,<self-name2>/<network-name2>,...")
    parser.add_argument('--ignore_unclaimed')
    parser.add_argument('--host', help="Specify host")
    parser.add_argument('--json', help="json file")
    parser.add_argument('--verbose', '-v', action='count')
    parser.add_argument('--version', action='version',
                        version="Running edda version {0}".format(__version__))
    parser.add_argument('--db', '-d', help="Specify DB name")
    parser.add_argument('--collection', '-c')
    parser.add_argument('filename', nargs='+')
    namespace = parser.parse_args()

    has_json = namespace.json or False
    http_port = namespace.http_port or '28000'
    port = namespace.port or '27017'
    hint = namespace.hint or ""
    ignore_unclaimed = namespace.ignore_unclaimed or False
    coll_name = namespace.collection or str(objectid.ObjectId())
    if namespace.host:
        host = namespace.host
        m = host.find(":")
        if m > -1:
            port = host[m + 1]
            host = host[:m]
    else:
        host = 'localhost'
    uri = "mongodb://" + host + ":" + port

    # configure logging
    if not namespace.verbose:
        logging.basicConfig(level=logging.ERROR)
    elif namespace.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif namespace.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif namespace.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    global LOGGER
    LOGGER = logging.getLogger(__name__)

    # exit gracefully if no server is running
    try:
        connection = MongoClient(uri)
    except:
        LOGGER.critical("Unable to connect to {0}, exiting".format(uri))
        return

    if namespace.db:
        db = connection[namespace.db[0]]
    else:
        db = connection.edda
    entries = db[coll_name].entries
    servers = db[coll_name].servers
    config = db[coll_name].config

    # first, see if we've gotten any .json files
    for file in namespace.filename:
        if ".json" in file:
            LOGGER.debug("Loading in edda data from {0}".format(file))
            json_file = open(file, "r")
            data = json.loads(json_file.read())
            send_to_js(data["frames"],
                       data["admin"],
                       http_port)
            edda_cleanup(db, coll_name)
            return

    # were we supposed to have a .json file?
    if has_json:
        LOGGER.critical("--json option used, but no .json file given")
        return

    # run full log processing
    processed_files = []
    for filename in namespace.filename:
        if filename in processed_files:
            continue
        logs = extract_log_lines(filename)
        process_log(logs, servers, entries, config)
        processed_files.append(filename)

    # anything to show?
    if servers.count() == 0:
        LOGGER.critical("No servers were found, exiting")
        return
    if entries.count() == 0:
        LOGGER.critical("No meaningful events were found, exiting")
        return

    # match up addresses
    if len(namespace.filename) > 1:
        if address_matchup(db, coll_name, hint) != 1:
            LOGGER.critical("Could not resolve server names. Edda may work better if you provide a hint.")
            #return

    # match up events
    events = event_matchup(db, coll_name)

    frames = generate_frames(events, db, coll_name)
    server_config = get_server_config(servers, config, ignore_unclaimed)
    update_frames_with_config(frames, server_config)
    admin = get_admin_info(processed_files)

    LOGGER.critical("\nEdda is storing data under collection name {0}"
                    .format(coll_name))
    edda_json = open(coll_name + ".json", "w")
    json.dump(format_json(frames, admin), edda_json)

    send_to_js(frames, admin, http_port)
    edda_cleanup(db, coll_name)
Example #4
0
def main():
    if (len(sys.argv) < 2):
        print "Missing argument: please provide a filename"
        return

    # parse command-line arguments
    parser = argparse.ArgumentParser(
        description='Process and visualize log files from mongo servers')
    parser.add_argument('--port', help="Specify the MongoDb port to use")
    parser.add_argument('--http_port', help="Specify the HTTP Port")
    parser.add_argument('--host', help="Specify host")
    parser.add_argument('--json', help="json file")
    parser.add_argument('--verbose', '-v', action='count')
    parser.add_argument('--version',
                        action='version',
                        version="Running edda version {0}".format(__version__))
    parser.add_argument('--db', '-d', help="Specify DB name")
    parser.add_argument('--collection', '-c')
    parser.add_argument('filename', nargs='+')
    namespace = parser.parse_args()

    has_json = namespace.json or False
    http_port = namespace.http_port or '28000'
    port = namespace.port or '27017'
    coll_name = namespace.collection or str(objectid.ObjectId())
    if namespace.host:
        host = namespace.host
        m = host.find(":")
        if m > -1:
            port = host[m + 1]
            host = host[:m]
    else:
        host = 'localhost'
    uri = "mongodb://" + host + ":" + port

    # configure logging
    if not namespace.verbose:
        logging.basicConfig(level=logging.ERROR)
    elif namespace.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif namespace.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif namespace.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    global LOGGER
    LOGGER = logging.getLogger(__name__)

    # exit gracefully if no server is running
    try:
        connection = Connection(uri)
    except:
        LOGGER.critical("Unable to connect to {0}, exiting".format(uri))
        return

    if namespace.db:
        db = connection[namespace.db[0]]
    else:
        db = connection.edda
    entries = db[coll_name].entries
    servers = db[coll_name].servers
    config = db[coll_name].config

    # first, see if we've gotten any .json files
    for file in namespace.filename:
        if ".json" in file:
            LOGGER.debug("Loading in edda data from {0}".format(file))
            json_file = open(file, "r")
            data = json.loads(json_file.read())
            send_to_js(data["frames"], data["admin"], http_port)
            edda_cleanup(db, coll_name)
            return

    # were we supposed to have a .json file?
    if has_json:
        LOGGER.critical("--json option used, but no .json file given")
        return

    # run full log processing
    processed_files = []
    for filename in namespace.filename:
        if filename in processed_files:
            continue
        logs = extract_log_lines(filename)
        process_log(logs, servers, entries, config)
        processed_files.append(filename)

    # anything to show?
    if servers.count() == 0:
        LOGGER.critical("No servers were found, exiting")
        return
    if entries.count() == 0:
        LOGGER.critical("No meaningful events were found, exiting")
        return

    # match up addresses
    if len(namespace.filename) > 1:
        if address_matchup(db, coll_name) != 1:
            LOGGER.warning("Could not resolve server names")

    # match up events
    events = event_matchup(db, coll_name)

    frames = generate_frames(events, db, coll_name)
    server_config = get_server_config(servers, config)
    update_frames_with_config(frames, server_config)
    admin = get_admin_info(processed_files)

    LOGGER.critical(
        "\nEdda is storing data under collection name {0}".format(coll_name))
    edda_json = open(coll_name + ".json", "w")
    json.dump(format_json(frames, admin), edda_json)

    send_to_js(frames, admin, http_port)
    edda_cleanup(db, coll_name)