Ejemplo n.º 1
0
def main():
    json_points = []
    client = InfluxDBClient(host=args.influxdb_host,
                            ssl=args.ssl,
                            verify_ssl=False,
                            port=8086,
                            database=args.database)
    logger = configure_logging('parse_mms_metrics')

    extracted_metrics = extract_metrics_from_mms_dump(args.input_file)

    json_points = []
    for tagset, metrics_for_all_timestamps in extracted_metrics.items():
        for timestamp, metrics_for_one_timestamp in metrics_for_all_timestamps.items(
        ):
            json_points.append({
                "timestamp": timestamp,
                "measurement": "cloudmanager_data",
                "tags": {
                    "project": tagset[0],  # Magic number - not great
                    "hostname": tagset[1]
                },
                "fields": metrics_for_one_timestamp
            })
            if len(json_points) >= args.batch_size:
                print(len(json_points))
                write_points(logger, client, json_points, "N/A")
                json_points = []

    write_points(logger, client, json_points, "N/A")
Ejemplo n.º 2
0
def main():
    json_points = []
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    logger = configure_logging('parse_mms_metrics')

    extracted_metrics = extract_metrics_from_mms_dump(args.input_file)


    json_points = []
    for tagset, metrics_for_all_timestamps in extracted_metrics.items():
        for timestamp, metrics_for_one_timestamp in metrics_for_all_timestamps.items():
            json_points.append({
                "timestamp": timestamp,
                "measurement": "cloudmanager_data",
                "tags": {
                    "project": tagset[0], # Magic number - not great
                    "hostname": tagset[1]
                },
                "fields": metrics_for_one_timestamp
            })
            if len(json_points) >= args.batch_size:
                print(len(json_points))
                write_points(logger, client, json_points, "N/A")
                json_points = []

    write_points(logger, client, json_points, "N/A")
Ejemplo n.º 3
0
def main():
    logger = configure_logging('parse_serverstatus')
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    with open(args.input_file, 'r') as f:
        for line_number, chunk in enumerate(grouper(f, args.batch_size)):
            # print(line_number)
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                if line:
                    try:
                        server_status_json = json.loads(line)
                        # print((line_number + 0) * _BATCH_SIZE)
                        # print((line_number + 1) * _BATCH_SIZE)
                        common_metric_data = get_metrics("serverstatus", server_status_json, common_metrics, line_number)
                        json_points.append(create_point(*common_metric_data))
                        wiredtiger_metric_data = get_metrics("serverstatus_wiredtiger", server_status_json, wiredtiger_metrics, line_number)
                        json_points.append(create_point(*wiredtiger_metric_data))
                        # for metric_data in get_metrics(server_status_json, common_metrics, line_number):
                        #     import ipdb; ipdb.set_trace()
                        #     print(json_points)
                        #     json_points.append(create_point(*metric_data))
                        # # for metric in get_metrics(server_status_json, wiredtiger_metrics, line_number):
                        #     json_points.append(create_point(*metric))
                        # for metric in get_metrics(server_status_json, mmapv1_metrics, line_number):
                        #     json_points.append(create_point(*metric))
                    except ValueError:
                        logger.error("Line {} does not appear to be valid JSON - \"{}\"".format(line_number, line.strip()))
            write_points(logger, client, json_points, line_number)
def main(argv):
    user = None
    if argv:
        user = argv[0]
    total, largest_process, largest_process_name = get_memory_usage(user)
    series_name = 'default.{0}.memory.usage'.format(settings.SERVER_NAME)
    data = [{
        'measurement': series_name,
        'columns': ['value', 'largest_process', 'largest_process_name', ],
        'points': [[total, largest_process, largest_process_name]], }]
    write_points(data)
Ejemplo n.º 5
0
def main():
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    logger = configure_logging('parse_sar_disk')
    sar_timezone = timezone(args.timezone)
    with open(args.input_file, 'r') as f:
        header_split = f.readline().split()
        hostname = header_split[2].strip("()")
        logger.info("Found hostname {}".format(hostname))
        date = header_split[3]
        logger.info("Found date {} (MM/DD/YYYY)".format(date))
        json_points = []
        for line_number, line in enumerate(f):
            if line.strip() and 'Average:' not in line: # We skip any empty lines, and also the "Average:" lines at the end
                if all(header_keyword in line for header_keyword in ['DEV', 'tps', 'rd_sec/s', 'wr_sec/s']):
                    # Skip the header lines - if a device name contains all of the four keywords below, I will eat my hat
                    pass
                else:
                    disk_stats = dict(zip(SAR_DISK_HEADERS, line.split()))
                    values = {}
                    local_timestamp = datetime.strptime("{} {} {}".format(date, disk_stats['timestamp'], disk_stats['AM_OR_PM']), "%m/%d/%Y %I:%M:%S %p")
                    timestamp = sar_timezone.localize(local_timestamp)
                    for metric_name, value in disk_stats.items():
                        if metric_name == 'device':
                            disk_name = value
                        elif metric_name in ['AM_OR_PM', 'timestamp']:
                            pass
                        else:
                            values[metric_name] = float(value)
                    json_points.append({
                        "measurement": "sar_disk",
                        "tags": {
                            "project": args.project,
                            "hostname": hostname,
                            "device": disk_name,
                        },
                        "time": timestamp.isoformat(),
                        "fields": values
                    })
            if len(json_points) >= args.batch_size:
                write_points(logger, client, json_points, line_number)
                json_points = []
        write_points(logger, client, json_points, line_number)
Ejemplo n.º 6
0
def main():
    client = InfluxDBClient(host=args.influxdb_host,
                            ssl=args.ssl,
                            verify_ssl=False,
                            port=8086,
                            database=args.database)
    logger = configure_logging('parse_operations')
    with open(args.input_file, 'r', encoding="latin-1") as f:
        line_count = 0
        for chunk in grouper(f, args.batch_size):
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                line_count += 1
                if line and line.strip().endswith("ms"):
                    values = {}
                    tags = {
                        'project': args.project,
                        'hostname': args.hostname,
                    }
                    try:
                        tags['operation'] = line.split("] ", 1)[1].split()[0]
                    except IndexError as e:
                        logger.error(
                            "Unable to get operation type - {} - {}".format(
                                e, line))
                        break
                    if tags['operation'] in [
                            'command', 'query', 'getmore', 'insert', 'update',
                            'remove', 'aggregate', 'mapreduce'
                    ]:
                        thread = line.split("[", 1)[1].split("]")[0]
                        # Alternately - print(split_line[3])
                        if tags['operation'] == 'command':
                            tags['command'] = line.split(
                                "command: ")[1].split()[0]
                        if "conn" in thread:
                            tags['connection_id'] = thread
                        split_line = line.split()
                        values['duration_in_milliseconds'] = int(
                            split_line[-1].rstrip('ms'))
                        # TODO 2.4.x timestamps have spaces
                        timestamp = parse(split_line[0])
                        if split_line[1].startswith("["):
                            # TODO - Parse locks from 2.6 style loglines
                            # 2.4 Logline:
                            tags['namespace'] = split_line[3]
                            for stat in reversed(split_line):
                                if "ms" in stat:
                                    pass
                                elif ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                elif stat == "locks(micros)":
                                    pass
                                else:
                                    break
                        else:
                            # 3.x logline:
                            tags['namespace'] = split_line[5]
                            # TODO - Should we be splitting on "locks:{" instead?
                            pre_locks, locks = line.rsplit("locks:", 1)
                            # Strip duration from locks
                            locks = locks.rsplit(" ", 1)[0]
                            # Add quotation marks around string, so that it is valid JSON
                            locks = re.sub(r"(\w+):", "\"\g<1>\":", locks)
                            locks_document = flatdict.FlatDict(
                                json.loads(locks), delimiter="_")
                            for key, value in locks_document.iteritems():
                                values["locks_{}".format(key)] = int(value)

                            # We work backwards from the end, until we run out of key:value pairs
                            # TODO - Can we assume these are always integers?
                            for stat in reversed(pre_locks.split()):
                                if ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                else:
                                    break
                            # TODO - Parse the full query plan for IXSCAN
                            if 'planSummary: ' in line:
                                tags['plan_summary'] = (line.split(
                                    'planSummary: ', 1)[1].split()[0])
                        json_points.append(
                            create_point(timestamp, "operations", values,
                                         tags))
                    else:
                        logger.info(
                            "'{}' is not a recognised operation type - not parsing this line ({})"
                            .format(tags['operation'], line))
            if json_points:
                # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator
                try:
                    # TODO - Have a dry-run mode
                    write_points(logger, client, json_points, line_count)
                    pass
                except Exception as e:
                    logger.error("Retries exceeded. Giving up on this point.")
Ejemplo n.º 7
0
                              w_local_phero,
                              w_history,
                              c_greed,
                              cost_func=ants.graph_distance)

    LOGN(
        "\tTransform the resulting nodes permutation into a path on the graph")
    # by finding the shortest path between two cities.
    traj = []
    for start, end in utils.tour(best["permutation"]):
        p, c = shortpath.astar(G, start, end)
        traj += p
    trajs.append(traj)

    with open("d%i_tour.points" % depth, "w") as fd:
        utils.write_points(traj, fd)

    with open("d%i_pheromones.mat" % depth, "w") as fd:
        utils.write_matrix(phero, fd)

########################################################################
# TRIANGULATION
########################################################################

triangulated = []

if ask_for.triangulation:
    with open(ask_for.triangulation) as fd:
        triangulated = triangulation.load(fd)

else:
Ejemplo n.º 8
0
def main():
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    logger = configure_logging('parse_operations')
    with open(args.input_file, 'r', encoding="latin-1") as f:
        line_count = 0
        for chunk in grouper(f, args.batch_size):
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                line_count += 1
                if line and line.strip().endswith("ms"):
                    values = {}
                    tags = {
                        'project': args.project,
                        'hostname': args.hostname,
                    }
                    try:
                        tags['operation'] = line.split("] ", 1)[1].split()[0]
                    except IndexError as e:
                        logger.error("Unable to get operation type - {} - {}".format(e, line))
                        break
                    if tags['operation'] in ['command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce']:
                        thread = line.split("[", 1)[1].split("]")[0]
                        # Alternately - print(split_line[3])
                        if tags['operation'] == 'command':
                            tags['command'] = line.split("command: ")[1].split()[0]
                        if "conn" in thread:
                            tags['connection_id'] = thread
                        split_line = line.split()
                        values['duration_in_milliseconds'] = int(split_line[-1].rstrip('ms'))
                        # TODO 2.4.x timestamps have spaces
                        timestamp = parse(split_line[0])
                        if split_line[1].startswith("["):
                            # TODO - Parse locks from 2.6 style loglines
                            # 2.4 Logline:
                            tags['namespace'] = split_line[3]
                            for stat in reversed(split_line):
                                if "ms" in stat:
                                    pass
                                elif ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                elif stat == "locks(micros)":
                                    pass
                                else:
                                    break
                        else:
                            # 3.x logline:
                            tags['namespace'] = split_line[5]
                            # TODO - Should we be splitting on "locks:{" instead?
                            pre_locks, locks = line.rsplit("locks:", 1)
                            # Strip duration from locks
                            locks = locks.rsplit(" ", 1)[0]
                            # Add quotation marks around string, so that it is valid JSON
                            locks = re.sub(r"(\w+):", "\"\g<1>\":", locks)
                            locks_document = flatdict.FlatDict(json.loads(locks), delimiter="_")
                            for key, value in locks_document.iteritems():
                                values["locks_{}".format(key)] = int(value)



                            # We work backwards from the end, until we run out of key:value pairs
                            # TODO - Can we assume these are always integers?
                            for stat in reversed(pre_locks.split()):
                                if ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                else:
                                    break
                            # TODO - Parse the full query plan for IXSCAN
                            if 'planSummary: ' in line:
                                tags['plan_summary'] = (line.split('planSummary: ', 1)[1].split()[0])
                        json_points.append(create_point(timestamp, "operations", values, tags))
                    else:
                        logger.info("'{}' is not a recognised operation type - not parsing this line ({})".format(tags['operation'], line))
            if json_points:
                # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator
                try:
                    # TODO - Have a dry-run mode
                    write_points(logger, client, json_points, line_count)
                    pass
                except Exception as e:
                    logger.error("Retries exceeded. Giving up on this point.")
Ejemplo n.º 9
0
def main():
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    logger = configure_logging('parse_iostat')
    iostat_timezone = timezone(args.timezone)
    with open(args.input_file, 'r') as f:
        if args.hostname:
            f.__next__() # Skip the "Linux..." line
        else:
            hostname = re.split(r'[()]', f.readline())[1]
        logger.info("Found hostname {}".format(hostname))
        f.__next__() # Skip the blank line
        line_counter = 2
        for chunk_index, chunk in enumerate(grouper(parse_iostat(f), args.batch_size)):
            json_points = []
            for block in chunk:
                if block:
                    try:
                        for i, line in enumerate(block):
                            line_counter += 1
                            if i == 0:
                                timestamp = iostat_timezone.localize(line)
                                # print(timestamp)
                                # import ipdb;ipdb.set_trace()
                                # print("timestamp is {}".format(timestamp))
                                # TODO: Timezone?
                                # TODO: Better way of storing timestamp
                            elif i == 1: # CPU Metric Headings
                                pass
                            elif i==2:
                                system_stats = dict(zip(system_stat_headers, line.split()))
                                values = {}
                                for metric_name, value in system_stats.items():
                                    values[metric_name] = float(value)
                                json_points.append({
                                    "measurement": "iostat",
                                    "tags": {
                                        "project": args.project,
                                        "hostname": hostname
                                    },
                                    "time": timestamp.isoformat(),
                                    "fields": values
                                })
                            elif i==4: # Disk metric headings
                                pass
                            elif i >= 5 and line:
                                disk_stats = {}
                                device = line.split()[0]
                                disk_stats[device] = dict(zip(disk_stat_headers, line.split()[1:]))

                                for disk_name, metrics in disk_stats.items():
                                    values = {}
                                    for metric_name, value in metrics.items():
                                        # Nasty hack to deal with bad data from Morgan Stanley
                                        # if disk_name not in ['sda', 'sdb', 'dm-0', 'dm-1', 'dm-2']:
                                        #     print(block)
                                        #     raise ValueError
                                        values[metric_name] = float(value)
                                    json_points.append({
                                        "measurement": "iostat",
                                        "tags": {
                                            "project": args.project,
                                            "hostname": hostname,
                                            "device": disk_name,
                                        },
                                        "time": timestamp.isoformat(),
                                        "fields": values
                                    })

                    except ValueError as e:
                        print("Bad output seen - skipping")
                        print(e)
                        print(block)
            write_points(logger, client, json_points, line_counter)
Ejemplo n.º 10
0
                except ValueError as e:
                    logger.error("Error parsing line - {} - {}".format(e, line))
                    break
                if ' connections now open)' in line:
                    connection_count = line.split("(")[1].split()[0]
                    # TODO - We should be sending an int, not a float - connection counters are integral values
                    json_points.append(create_generic_point('connection_counters', connection_count, timestamp, base_tags))
                if '[initandlisten] connection accepted from' in line:
                    event = OpenConnectionEvent(timestamp, logline)
                    json_points.append(event.get_json())
                elif '] end connection ' in line:
                    event = CloseConnectionEvent(timestamp, logline)
                    json_points.append(event.get_json())
        if json_points:
            # We need to deal with 500: timeout - some kind of retry behaviour
            # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator
            try:
                write_points(logger, client, json_points, line_counter)
            except Exception as e:
                logger.error("Retries exceeded. Giving up on this point.")
        else:
            print("empty points!!!")

print("Number of connections: {}".format(len(connections)))
i = 0
for connection in connections.items():
    if i < 10:
        print(connection)
        i += 1
    else:
        break
Ejemplo n.º 11
0
    w_local_phero = 0.1
    c_greed = 0.9
    w_history = 1.0

    best,phero = ants.search( G, max_it, num_ants, decay, w_heur, w_local_phero, w_history, c_greed, cost_func = ants.graph_distance )

    LOGN( "\tTransform the resulting nodes permutation into a path on the graph" )
    # by finding the shortest path between two cities.
    traj = []
    for start,end in utils.tour(best["permutation"]):
        p,c = shortpath.astar( G, start, end )
        traj += p
    trajs.append(traj)

    with open("d%i_tour.points" % depth, "w") as fd:
        utils.write_points( traj, fd )

    with open("d%i_pheromones.mat" % depth, "w") as fd:
        utils.write_matrix( phero, fd )


########################################################################
# TRIANGULATION
########################################################################

triangulated = []

if ask_for.triangulation:
    with open(ask_for.triangulation) as fd:
        triangulated = triangulation.load(fd)
def main(argv):
    folder = argv[0]
    total = get_disk_usage(folder)
    series_name = 'default.{0}.disk.usage'.format(settings.SERVER_NAME)
    write_points(series_name, total)
Ejemplo n.º 13
0
calib_path = 'kitti/training/calib/{}.txt'.format(args.idx)

calib = Calibration(calib_path)
points = utils.load_point_clouds(points_path)
bboxes = utils.load_3d_boxes(label_path, args.category)
bboxes = calib.bbox_rect_to_lidar(bboxes)

corners3d = utils.boxes_to_corners_3d(bboxes)
points_flag = utils.is_within_3d_box(points, corners3d)

points_is_within_3d_box = []
for i in range(len(points_flag)):
    p = points[points_flag[i]]
    if len(p)>0:
        points_is_within_3d_box.append(p)
        box = bboxes[i]
        points_canonical, box_canonical = utils.points_to_canonical(p, box)
        points_canonical, box_canonical = utils.lidar_to_shapenet(points_canonical, box_canonical)
        pts_name = 'output/{}_{}_point_{}'.format(args.idx, args.category, i)
        box_name = 'output/{}_{}_bbox_{}'.format(args.idx, args.category, i)
        utils.write_points(points_canonical, pts_name)
        utils.write_bboxes(box_canonical, box_name)

points_is_within_3d_box = np.concatenate(points_is_within_3d_box, axis=0)
points = points_is_within_3d_box

utils.write_points(points, 'output/points')
utils.write_bboxes(bboxes, 'output/bboxes')


Ejemplo n.º 14
0
def main():
    client = InfluxDBClient(host=args.influxdb_host, ssl=args.ssl, verify_ssl=False, port=8086, database=args.database)
    logger = configure_logging('parse_operations')
    with open(args.input_file, 'r') as f:
        line_count = 0
        for chunk in grouper(f, args.batch_size):
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                line_count += 1
                if line and line.endswith("ms"):
                    values = {}
                    tags = {
                        'project': args.project,
                        'hostname': args.hostname,
                    }
                    try:
                        tags['operation'] = line.split("] ", 1)[1].split()[0]
                    except IndexError as e:
                        logger.error("Unable to parse line - {} - {}".format(e, line))
                        break
                    if tags['operation'] in ['command', 'query', 'getmore', 'insert', 'update', 'remove', 'aggregate', 'mapreduce']:
                        # print(line.strip())
                        thread = line.split("[", 1)[1].split("]")[0]
                        # Alternately - print(split_line[3])
                        if tags['operation'] == 'command':
                            tags['command'] = line.split("command: ")[1].split()[0]
                        if "conn" in thread:
                            tags['connection_id'] = thread
                        split_line = line.split()
                        values['duration_in_milliseconds'] = int(split_line[-1].rstrip('ms'))
                        # TODO 2.4.x timestamps have spaces
                        timestamp = parse(split_line[0])
                        if split_line[1].startswith("["):
                            # 2.4 Logline:
                            tags['namespace'] = split_line[3]
                            for stat in reversed(split_line):
                                if "ms" in stat:
                                    pass
                                elif ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                elif stat == "locks(micros)":
                                    pass
                                else:
                                    break
                        else:
                            # 3.x logline:
                            tags['namespace'] = split_line[5]
                            # TODO - Parse locks
                            pre_locks, locks = line.split("locks:{", 1)
                            # We work backwards from the end, until we run out of key:value pairs
                            # TODO - Can we assume these are always integers?
                            for stat in reversed(pre_locks.split()):
                                if ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                else:
                                    break
                            # TODO - Parse the full query plan for IXSCAN
                            if 'planSummary: ' in line:
                                tags['plan_summary'] = (line.split('planSummary: ', 1)[1].split()[0])
                        json_points.append(create_point(timestamp, "operations", values, tags))
            if json_points:
                # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator
                try:
                    write_points(logger, client, json_points, line_count)
                except Exception as e:
                    logger.error("Retries exceeded. Giving up on this point.")
Ejemplo n.º 15
0
def main():
    client = InfluxDBClient(host=args.influxdb_host,
                            ssl=args.ssl,
                            verify_ssl=False,
                            port=8086,
                            database=args.database)
    logger = configure_logging('parse_iostat')
    iostat_timezone = timezone(args.timezone)
    with open(args.input_file, 'r') as f:
        if args.hostname:
            f.__next__()  # Skip the "Linux..." line
        else:
            hostname = re.split(r'[()]', f.readline())[1]
        logger.info("Found hostname {}".format(hostname))
        f.__next__()  # Skip the blank line
        line_counter = 2
        for chunk_index, chunk in enumerate(
                grouper(parse_iostat(f), args.batch_size)):
            json_points = []
            for block in chunk:
                if block:
                    try:
                        for i, line in enumerate(block):
                            line_counter += 1
                            if i == 0:
                                timestamp = iostat_timezone.localize(line)
                                # print(timestamp)
                                # import ipdb;ipdb.set_trace()
                                # print("timestamp is {}".format(timestamp))
                                # TODO: Timezone?
                                # TODO: Better way of storing timestamp
                            elif i == 1:  # CPU Metric Headings
                                pass
                            elif i == 2:
                                system_stats = dict(
                                    zip(system_stat_headers, line.split()))
                                values = {}
                                for metric_name, value in system_stats.items():
                                    values[metric_name] = float(value)
                                json_points.append({
                                    "measurement":
                                    "iostat",
                                    "tags": {
                                        "project": args.project,
                                        "hostname": hostname
                                    },
                                    "time":
                                    timestamp.isoformat(),
                                    "fields":
                                    values
                                })
                            elif i == 4:  # Disk metric headings
                                pass
                            elif i >= 5 and line:
                                disk_stats = {}
                                device = line.split()[0]
                                disk_stats[device] = dict(
                                    zip(disk_stat_headers,
                                        line.split()[1:]))

                                for disk_name, metrics in disk_stats.items():
                                    values = {}
                                    for metric_name, value in metrics.items():
                                        # Nasty hack to deal with bad data from Morgan Stanley
                                        # if disk_name not in ['sda', 'sdb', 'dm-0', 'dm-1', 'dm-2']:
                                        #     print(block)
                                        #     raise ValueError
                                        values[metric_name] = float(value)
                                    json_points.append({
                                        "measurement":
                                        "iostat",
                                        "tags": {
                                            "project": args.project,
                                            "hostname": hostname,
                                            "device": disk_name,
                                        },
                                        "time":
                                        timestamp.isoformat(),
                                        "fields":
                                        values
                                    })

                    except ValueError as e:
                        print("Bad output seen - skipping")
                        print(e)
                        print(block)
            write_points(logger, client, json_points, line_counter)
Ejemplo n.º 16
0
def main():
    client = InfluxDBClient(host=args.influxdb_host,
                            ssl=args.ssl,
                            verify_ssl=False,
                            port=8086,
                            database=args.database)
    logger = configure_logging('parse_operations')
    with open(args.input_file, 'r') as f:
        line_count = 0
        for chunk in grouper(f, args.batch_size):
            json_points = []
            for line in chunk:
                # zip_longest will backfill any missing values with None, so we need to handle this, otherwise we'll miss the last batch
                line_count += 1
                if line and line.endswith("ms"):
                    values = {}
                    tags = {
                        'project': args.project,
                        'hostname': args.hostname,
                    }
                    try:
                        tags['operation'] = line.split("] ", 1)[1].split()[0]
                    except IndexError as e:
                        logger.error("Unable to parse line - {} - {}".format(
                            e, line))
                        break
                    if tags['operation'] in [
                            'command', 'query', 'getmore', 'insert', 'update',
                            'remove', 'aggregate', 'mapreduce'
                    ]:
                        # print(line.strip())
                        thread = line.split("[", 1)[1].split("]")[0]
                        # Alternately - print(split_line[3])
                        if tags['operation'] == 'command':
                            tags['command'] = line.split(
                                "command: ")[1].split()[0]
                        if "conn" in thread:
                            tags['connection_id'] = thread
                        split_line = line.split()
                        values['duration_in_milliseconds'] = int(
                            split_line[-1].rstrip('ms'))
                        # TODO 2.4.x timestamps have spaces
                        timestamp = parse(split_line[0])
                        if split_line[1].startswith("["):
                            # 2.4 Logline:
                            tags['namespace'] = split_line[3]
                            for stat in reversed(split_line):
                                if "ms" in stat:
                                    pass
                                elif ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                elif stat == "locks(micros)":
                                    pass
                                else:
                                    break
                        else:
                            # 3.x logline:
                            tags['namespace'] = split_line[5]
                            # TODO - Parse locks
                            pre_locks, locks = line.split("locks:{", 1)
                            # We work backwards from the end, until we run out of key:value pairs
                            # TODO - Can we assume these are always integers?
                            for stat in reversed(pre_locks.split()):
                                if ":" in stat:
                                    key, value = stat.split(":", 1)
                                    values[key] = int(value)
                                else:
                                    break
                            # TODO - Parse the full query plan for IXSCAN
                            if 'planSummary: ' in line:
                                tags['plan_summary'] = (line.split(
                                    'planSummary: ', 1)[1].split()[0])
                        json_points.append(
                            create_point(timestamp, "operations", values,
                                         tags))
            if json_points:
                # TODO - We shouldn't need to wrap this in try/except - should be handled by retry decorator
                try:
                    write_points(logger, client, json_points, line_count)
                except Exception as e:
                    logger.error("Retries exceeded. Giving up on this point.")