def run(self, accessor): result = {} start_cluster = 0 end_cluster = 0 for bucket, stats_info in stats_buffer.buckets.iteritems(): trend = [] values = stats_info[accessor["scale"]][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] for node, vals in nodeStats.iteritems(): a, b = util.linreg(timestamps, vals) if b < 1: trend.append((node, 0)) else: start_val = b start_cluster += b end_val = a * timestamps[-1] + b end_cluster += end_val rate = (end_val * 1.0 / b - 1.0) * 100 trend.append((node, util.pretty_float(rate) + "%")) result[bucket] = trend if len(stats_buffer.buckets) > 0: rate = (end_cluster * 1.0 / start_cluster - 1.0) * 100 result["cluster"] = util.pretty_float(rate) + "%" return result
def run(self, accessor, scale, threshold=None): result = {} if threshold.has_key("DiskQueueDiagnosis"): threshold_val = threshold["DiskQueueDiagnosis"][accessor["name"]] else: threshold_val = accessor["threshold"] for bucket, stats_info in stats_buffer.buckets.iteritems(): trend_error = [] trend_warn = [] res = [] values = stats_info[scale][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] for node, vals in nodeStats.iteritems(): a, b = util.linreg(timestamps, vals) if a > threshold_val["high"]: symptom = accessor["symptom"] % (util.pretty_float(a, 3), threshold_val["high"]) trend_error.append({"node":node, "level":"red", "value":symptom}) res.append((node, util.pretty_float(a))) elif a > threshold_val["low"]: symptom = accessor["symptom"] % (util.pretty_float(a, 3), threshold_val["low"]) trend_warn.append({"node":node, "level":"yellow", "value":symptom}) res.append((node, util.pretty_float(a))) if len(trend_error) > 0: res.append(("error", trend_error)) if len(trend_warn) > 0: res.append(("warn", trend_warn)) result[bucket] = res return result
def run(self, accessor): result = {} cluster = 0 for bucket, stats_info in stats_buffer.buckets.iteritems(): values = stats_info[accessor["scale"]][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] trend = [] total = 0 data = [] num_error = [] for node, vals in nodeStats.iteritems(): #a, b = util.linreg(timestamps, vals) value = sum(vals) / samplesCount total += value if value > accessor["threshold"]: num_error.append({"node":node, "value":value}) trend.append((node, util.pretty_float(value))) data.append(value) total /= len(nodeStats) trend.append(("total", util.pretty_float(total))) trend.append(("variance", util.two_pass_variance(data))) if len(num_error) > 0: trend.append(("error", num_error)) cluster += total result[bucket] = trend if len(stats_buffer.buckets) > 0: result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) return result
def run(self, accessor, scale, threshold=None): result = {} if threshold.has_key("DiskQueueDrainingAnalysis"): threshold_val = threshold["DiskQueueDrainingAnalysis"][accessor["name"]] else: threshold_val = accessor["threshold"] for bucket, stats_info in stats_buffer.buckets.iteritems(): res = [] disk_queue_avg_error = [] drain_values = stats_info[scale][accessor["counter"][0]] len_values = stats_info[scale][accessor["counter"][1]] nodeStats = drain_values["nodeStats"] samplesCount = drain_values["samplesCount"] for node, vals in nodeStats.iteritems(): if samplesCount > 0: avg = sum(vals) / samplesCount else: avg = 0 if node in len_values["nodeStats"]: disk_len_vals = len_values["nodeStats"][node] else: continue if samplesCount > 0: len_avg = sum(disk_len_vals) / samplesCount else: len_avg = 0 if avg < threshold_val["drainRate"] and len_avg > threshold_val["diskLength"]: symptom = accessor["symptom"] % (util.pretty_float(avg), threshold_val["drainRate"], int(len_avg), threshold_val["diskLength"]) disk_queue_avg_error.append({"node":node, "level":"red", "value":symptom}) res.append((node, (util.pretty_float(avg), int(len_avg)))) if len(disk_queue_avg_error) > 0: res.append(("error", disk_queue_avg_error)) result[bucket] = res return result
def run(self, accessor, threshold=None): result = {} cluster = 0 if threshold.has_key("ActiveReplicaResidentRatio"): threshold_val = threshold["ActiveReplicaResidentRatio"]["activeReplicaResidentRatio"] else: threshold_val = accessor["threshold"] for bucket, stats_info in stats_buffer.buckets.iteritems(): item_avg = { "curr_items": [], "vb_replica_curr_items": [], } num_error = [] for counter in accessor["counter"]: values = stats_info[accessor["scale"]][counter] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] for node, vals in nodeStats.iteritems(): if samplesCount > 0: avg = sum(vals) / samplesCount else: avg = 0 item_avg[counter].append((node, avg)) res = [] active_total = replica_total = 0 for active, replica in zip(item_avg['curr_items'], item_avg['vb_replica_curr_items']): if replica[1] == 0: if active[1] == 0: res.append((active[0], "No active items")) else: res.append((active[0], "No replica")) else: ratio = 100.0 * active[1] / replica[1] res.append((active[0], util.pretty_float(ratio) + "%")) active_total += active[1] replica_total += replica[1] if active_total == 0: res.append(("total", "no active items")) elif replica_total == 0: res.append(("total", "no replica items")) if stats_buffer.bucket_info[bucket]["bucketType"] != 'memcached': num_error.append({"node":"total", "value": "No replica items"}) else: ratio = active_total * 100.0 / replica_total cluster += ratio res.append(("total", util.pretty_float(ratio) + "%")) delta = abs(100 - ratio) if delta > threshold_val: symptom = accessor["symptom"].format(util.pretty_float(delta), util.pretty_float(threshold_val)) num_error.append({"node":"total", "value": symptom}) if len(num_error) > 0: res.append(("error", num_error)) result[bucket] = res if len(stats_buffer.buckets) > 0: result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) + "%" return result
def run(self, accessor, scale, threshold=None): result = {} for bucket, stats_info in stats_buffer.buckets.iteritems(): values = stats_info[scale][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] for node, vals in nodeStats.iteritems(): if len(vals): avg = vals[-1] else: avg = 0 if accessor.has_key("unit"): if accessor["unit"] == "size": avg = util.size_label(avg) elif accessor["unit"] == "number": avg = util.number_label(avg) elif accessor["unit"] == "time": avg = util.time_label(avg) else: avg = util.pretty_float(avg) if result.has_key(node): result[node].append((bucket, avg)) else: result[node] = [(bucket, avg)] return result
def run(self, accessor, scale, threshold=None): result = {} for bucket, stats_info in stats_buffer.buckets.iteritems(): values = stats_info[scale][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] for node, vals in nodeStats.iteritems(): if len(vals): avg = sum(vals) / len(vals) else: avg = 0 if result.has_key(node): result[node].append((bucket, util.pretty_float(avg))) else: result[node] = [(bucket, util.pretty_float(avg))] return result
def run(self, accessor): result = {} cluster = 0 for bucket, stats_info in stats_buffer.buckets.iteritems(): item_avg = { "curr_items": [], "vb_replica_curr_items": [], } num_error = [] for counter in accessor["counter"]: values = stats_info[accessor["scale"]][counter] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] for node, vals in nodeStats.iteritems(): avg = sum(vals) / samplesCount item_avg[counter].append((node, avg)) res = [] active_total = replica_total = 0 for active, replica in zip(item_avg['curr_items'], item_avg['vb_replica_curr_items']): if replica[1] == 0: res.append((active[0], "No replica")) else: ratio = 1.0 * active[1] / replica[1] res.append((active[0], util.pretty_float(ratio))) if ratio < accessor["threshold"]: num_error.append({"node":active[0], "value": ratio}) active_total += active[1] replica_total += replica[1] if replica_total == 0: res.append(("total", "no replica")) else: ratio = active_total * 1.0 / replica_total cluster += ratio res.append(("total", util.pretty_float(ratio))) if ratio != accessor["threshold"]: num_error.append({"node":"total", "value": ratio}) #if len(num_error) > 0: # result[bucket] = {"error" : num_error} #else: result[bucket] = res if len(stats_buffer.buckets) > 0: result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) return result
def run(self, accessor, threshold=None): result = {} cluster = 0 thresholdval = accessor["threshold"] if threshold.has_key("CacheMissRatio"): thresholdval = threshold["CacheMissRatio"] for bucket, stats_info in stats_buffer.buckets.iteritems(): values = stats_info[accessor["scale"]][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] trend = [] total = 0 data = [] num_error = [] for node, vals in nodeStats.iteritems(): #a, b = util.linreg(timestamps, vals) if samplesCount > 0: value = sum(vals) / samplesCount else: value = 0 total += value if value > thresholdval: symptom = accessor["symptom"].format(value, thresholdval) num_error.append({"node":node, "value":symptom}) trend.append((node, util.pretty_float(value) + "%")) data.append(value) if len(nodeStats) > 0: total /= len(nodeStats) trend.append(("total", util.pretty_float(total) + "%")) trend.append(("variance", util.two_pass_variance(data))) if len(num_error) > 0: trend.append(("error", num_error)) cluster += total result[bucket] = trend if len(stats_buffer.buckets) > 0: result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) + "%" return result
def run(self, accessor): result = [] for node, values in stats_buffer.nodes.iteritems(): result.append({"ip": values["host"], "port": values["port"], "cpuUtilization" :util.pretty_float(values["systemStats"]["cpu_utilization_rate"]), "swapTotal": util.size_label(values["systemStats"]["swap_total"]), "swapUsed" : util.size_label(values["systemStats"]["swap_used"]), "currentItems" : values["systemStats"]["currentItems"], "currentItemsTotal" : values["systemStats"]["currentItemsTotal"], "replicaCurrentItems" : values["systemStats"]["replicaCurrentItems"]}) return result
def run(self, accessor, scale, threshold=None): result = {} for node, values in stats_buffer.nodes.iteritems(): if values["status"] != "healthy": continue result[node] = { "ip": values["host"], "port": values["port"], "cpuUtilization" :util.pretty_float(values["systemStats"]["cpu_utilization_rate"]), "swapTotal": util.size_label(values["systemStats"]["swap_total"]), "swapUsed" : util.size_label(values["systemStats"]["swap_used"]), "currentItems" : values["systemStats"]["currentItems"], "currentItemsTotal" : values["systemStats"]["currentItemsTotal"], "replicaCurrentItems" : values["systemStats"]["replicaCurrentItems"]} return result
def run(self, accessor, threshold=None): result = [] hdd_total = 0 ram_total = 0 for node, nodeinfo in stats_buffer.nodes.iteritems(): if nodeinfo["status"] != "healthy": continue if nodeinfo["StorageInfo"].has_key("hdd"): hdd_total += nodeinfo['StorageInfo']['hdd']['usedByData'] if nodeinfo["StorageInfo"].has_key("ram"): ram_total += nodeinfo['StorageInfo']['ram']['usedByData'] if ram_total > 0: ratio = hdd_total * 100.0 / ram_total else: ratio = 0 return util.pretty_float(ratio) + "%"
def run(self, accessor, scale, threshold=None): result = {} for bucket, stats_info in stats_buffer.buckets.iteritems(): values = stats_info[scale][accessor["counter"]] timestamps = values["timestamp"] timestamps = [x - timestamps[0] for x in timestamps] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] trend = [] for node, vals in nodeStats.iteritems(): if samplesCount > 0: avg = sum(vals) / samplesCount else: avg = 0 trend.append((node, util.pretty_float(avg))) result[bucket] = trend return result
def run(self, accessor): result = [] for node, values in stats_buffer.nodes.iteritems(): result.append({ "ip": values["host"], "port": values["port"], "cpuUtilization": util.pretty_float( values["systemStats"]["cpu_utilization_rate"]), "swapTotal": util.size_label(values["systemStats"]["swap_total"]), "swapUsed": util.size_label(values["systemStats"]["swap_used"]), "currentItems": values["systemStats"]["currentItems"], "currentItemsTotal": values["systemStats"]["currentItemsTotal"], "replicaCurrentItems": values["systemStats"]["replicaCurrentItems"] }) return result
def run(self, accessor, scale, threshold=None): result = {} thresholdval = accessor["threshold"] if threshold.has_key("PerformanceDiagnosis_diskread"): thresholdval = threshold["PerformanceDiagnosis_diskread"] for bucket, stats_info in stats_buffer.buckets.iteritems(): if stats_info[scale].get(accessor["counter"][0], None) is None: return result diskRead_values = stats_info[scale][accessor["counter"][0]] cacheMissRate_values = stats_info[scale][accessor["counter"][1]] arr_values = stats_info[scale][accessor["counter"][2]] memUsed_values = stats_info[scale][accessor["counter"][3]] curr_values = stats_info[scale][accessor["counter"][4]] cmdSet_values = stats_info[scale][accessor["counter"][5]] timestamps = diskRead_values["timestamp"] samplesCount = diskRead_values["samplesCount"] trend = [] num_warn = [] for node, vals in diskRead_values["nodeStats"].iteritems(): diskRead_vals = diskRead_values["nodeStats"][node] cacheMissRate_vals = cacheMissRate_values["nodeStats"][node] arr_vals = arr_values["nodeStats"][node] memUsed_vals = memUsed_values["nodeStats"][node] curr_vals = curr_values["nodeStats"][node] cmdSet_vals = cmdSet_values["nodeStats"][node] if samplesCount > 0: node_avg_mem = sum(memUsed_vals) / samplesCount node_avg_curr = sum(curr_vals) / samplesCount node_avg_cmdset = sum(cmdSet_vals) / samplesCount else: node_avg_curr = 0 # Fine grained analysis abnormal_segs = util.abnormal_extract(diskRead_vals, thresholdval["ep_bg_fetched"]) abnormal_vals = [] for seg in abnormal_segs: begin_index = seg[0] seg_total = seg[1] if seg_total < thresholdval["recurrence"]: continue end_index = begin_index + seg_total diskread_avg = sum(diskRead_vals[begin_index : end_index]) / seg_total cmr_avg = sum(cacheMissRate_vals[begin_index : end_index]) / seg_total arr_avg = sum(arr_vals[begin_index : end_index]) / seg_total mem_avg = sum(memUsed_vals[begin_index : end_index]) / seg_total curr_avg = sum(curr_vals[begin_index : end_index]) / seg_total cmdSet_avg = sum(cmdSet_vals[begin_index : end_index]) / seg_total if cmr_avg > thresholdval["ep_cache_miss_rate"] and \ arr_avg < thresholdval["vb_active_resident_items_ratio"] and \ mem_avg > node_avg_mem and \ curr_avg > node_avg_curr and \ cmdSet_avg > node_avg_cmdset: symptom = accessor["symptom"] % (util.pretty_datetime(timestamps[begin_index]), util.pretty_datetime(timestamps[end_index-1]), util.number_label(int(curr_avg)), util.size_label(int(mem_avg)), util.pretty_float(cmr_avg), util.pretty_float(arr_avg), util.number_label(int(diskread_avg))) num_warn.append({"node":node, "value":symptom}) abnormal_vals.append(diskread_avg) if len(abnormal_vals) > 0: trend.append((node, {"value" : util.pretty_float(sum(abnormal_vals)/len(abnormal_vals)), "raw" : abnormal_vals} )) if len(num_warn) > 0: trend.append(("warn", num_warn)) result[bucket] = trend return result
def run(self, accessor, scale, threshold=None): result = {} cluster = 0 if threshold.has_key(accessor["name"]): threshold_val = threshold[accessor["name"]] else: threshold_val = accessor["threshold"] for bucket, stats_info in stats_buffer.buckets.iteritems(): item_avg = { "curr_items": [], "ep_tap_total_total_backlog_size": [], } num_error = [] num_warn = [] for counter in accessor["counter"]: values = stats_info[scale][counter] nodeStats = values["nodeStats"] samplesCount = values["samplesCount"] for node, vals in nodeStats.iteritems(): if samplesCount > 0: avg = sum(vals) / samplesCount else: avg = 0 item_avg[counter].append((node, avg)) res = [] active_total = replica_total = 0 for active, replica in zip(item_avg['curr_items'], item_avg['ep_tap_total_total_backlog_size']): if active[1] == 0: res.append((active[0], 0)) else: ratio = 100.0 * replica[1] / active[1] delta = int(replica[1]) if accessor["type"] == "percentage" and ratio > threshold_val["percentage"]["high"]: symptom = accessor["symptom"] % (util.pretty_float(ratio), threshold_val["percentage"]["high"]) num_error.append({"node":active[0], "value": symptom}) res.append((active[0], util.pretty_float(ratio) + "%")) elif accessor["type"] == "number" and delta > threshold_val["number"]["high"]: symptom = accessor["symptom"] % (util.number_label(delta), util.number_label(threshold_val["number"]["high"])) num_error.append({"node":active[0], "value": symptom}) res.append((active[0], util.number_label(delta))) elif accessor["type"] == "percentage" and ratio > threshold_val["percentage"]["low"]: symptom = accessor["symptom"] % (util.pretty_float(ratio), threshold_val["percentage"]["low"]) num_warn.append({"node":active[0], "value": symptom}) res.append((active[0], util.pretty_float(ratio) + "%")) elif accessor["type"] == "number" and delta > threshold_val["number"]["low"]: symptom = accessor["symptom"] % (util.number_label(delta), util.number_label(threshold_val["number"]["low"])) num_warn.append({"node":active[0], "value": symptom}) res.append((active[0], util.number_label(delta))) active_total += active[1] replica_total += replica[1] if active_total > 0: ratio = replica_total * 100.0 / active_total cluster += ratio if accessor["type"] == "percentage" and ratio > threshold_val["percentage"]["high"]: symptom = accessor["symptom"] % (util.pretty_float(ratio), threshold_val["percentage"]["high"]) num_error.append({"node":"total", "value": symptom}) res.append(("total", util.pretty_float(ratio) + "%")) elif accessor["type"] == "percentage" and ratio > threshold_val["percentage"]["low"]: symptom = accessor["symptom"] % (util.pretty_float(ratio), threshold_val["percentage"]["low"]) num_warn.append({"node":"total", "value": symptom}) res.append(("total", util.pretty_float(ratio) + "%")) if len(num_error) > 0: res.append(("error", num_error)) if len(num_warn) > 0: res.append(("warn", num_warn)) result[bucket] = res if len(stats_buffer.buckets) > 0: result["cluster"] = util.pretty_float(cluster / len(stats_buffer.buckets)) return result