def main(argv): if len(argv) < 2: sys.exit(__doc__) #TODO(con): get python2.4 working and remove this if argv[1] == "True": is_testver = 1 elif argv[1] == "False": is_testver = 0 else: try: is_testver = int(argv[1]) except ValueError: sys.exit(__doc__) ver = argv[0] if is_testver: borgmon_mode = borgmon_util.TEST else: borgmon_mode = borgmon_util.ACTIVE bu = borgmon_util.BorgmonUtil(ver, mode=borgmon_mode) # Now check /healthz if not check_healthz.CheckHealthz(bu.GetReactorPort()): print 'RESTART'
def getGraphURL(self, graph_name, auto_resize, wide_lines): mode = borgmon_util.INSTALL_STATE_TO_MODE_MAP.get( self.install_state, borgmon_util.ACTIVE) # default to ACTIVE bu = borgmon_util.BorgmonUtil(self.getGlobalParam("VERSION"), mode=mode) host = bu.GetBorgmonHostname() port = bu.GetBorgmonPort() vars_url = "" if auto_resize: yrange = '' else: yrange = 10 if wide_lines: width = '6' else: width = '3' yformat = "%.1s%c" gmt_offset = int(time.strftime('%z')) / 100 return GRAPH_URL % { 'borgmon_host': host, 'borgmon_port': port, 'rules': string.join(self.graphs[graph_name].vars, ';'), 'yrange': yrange, 'width': width, 'yformat': yformat, 'gmt': gmt_offset, }
def GetBorgmonVarValueCached(var, expr): """Call Borgmon to get the value of expr. First checks the cache for 'var', Borgmon is not checked if its in cache. Otherwise get 'expr' from Borgmon, and update the cache. Arguments: var: string: cache key to use expr: string: borgmon expression Returns: string: Borgmon reply, stripped of newlines etc, or None if there is any error """ # check the cache first result = ReadDataFromCache(var) if result: return result version = GetActiveVersion() if version is None: return None # Assume we are in active mode (borgmon port 4911) # There could be a problem here for SNMP in test mode bu = borgmon_util.BorgmonUtil(version, mode=borgmon_util.ACTIVE) reply = bu.GetAndEvalBorgmonExpr(expr) if reply is None: return None reply = str(reply) WriteDataToCache(var, reply) return reply
def _getuservars(self): """Return user-facing variables that we monitor in a two forms, in a 2-ple. The first entry will be a formatting string of name,value pairs, and the second is a dict mapping the name (e.g. 'num_urls_error') to the value. """ logging.info('borgmon_handler :: getuservars') vars = { 'num_urls_in_index_total': 'num_urls_in_index_total', 'num_urls_available_total': 'num_urls_available_total', 'interval_pages_per_sec_total': 'interval_pages_per_sec_total', 'num_urls_error': 'num_urls_error[%ds] - min(num_urls_error[%ds])' % (eval('secsSinceMidnight()'), eval('secsSinceMidnight()')), 'num_urls_error_now': 'num_urls_error', 'gws_searches_per_minute': 'gws_searches_per_minute', 'num_urls_crawled_today': 'num_urls_crawled[%ds] - min(num_urls_crawled[%ds])' % (eval('secsSinceMidnight()'), eval('secsSinceMidnight()')), 'num_urls_crawled_now': 'num_urls_crawled', 'num_urls_served': 'num_urls_served', 'scheduled_crawl_start_time': 'scheduled_crawl_start_time', 'scheduled_crawl_end_time': 'scheduled_crawl_end_time', 'scheduled_crawl_is_running': 'scheduled_crawl_is_running', 'doc_bytes_received': 'doc_bytes_received', } var_map = {} for var in vars.keys(): bu = borgmon_util.BorgmonUtil(self.version, mode=self.borgmon_mode) value = bu.GetAndEvalBorgmonExpr(vars[var]) if value: var_map[var] = value ret = '0\n' + string.join( map(lambda k: "%s=%s" % (k, str(var_map[k])), var_map.keys()), '\n') + '\n' logging.info('returning ' + ret) return (ret, var_map)
def makegraph(self, graph_name): """ Generates the sum urls crawled and queries per minute graph. Returns 0 if the graph gets genarated successfully, otherwise returns 0. For queries per minute graph - if the max QPM is less than 10 then we dont autoresize. This is to show foo bar queries at the bottom of the graph. TODO(meghna) Should we use GRAPH_DELAY_SECONDS somewhere here ? """ logging.info('borgmon makegraph ' + graph_name) auto_resize = 1 if (graph_name == 'QUERIES_PER_MINUTE' or graph_name == 'QUERIES_PER_MINUTE_THUMBNAIL'): bu = borgmon_util.BorgmonUtil(self.version, mode=self.borgmon_mode) value = bu.GetAndEvalBorgmonExpr( 'max(gws_searches_per_minute[24h])') if not value or value < 10: auto_resize = 0 wide_lines = 0 if (graph_name == 'QUERIES_PER_MINUTE_THUMBNAIL' or graph_name == 'SUM_URLS_TOTAL_THUMBNAIL'): wide_lines = 1 graph_url = self.cfg.getGraphURL(graph_name, auto_resize, wide_lines) logging.info('graph_url=' + graph_url) if not graph_url: return 1 graph_pipe = urllib.urlopen(graph_url) graph = graph_pipe.read() graph_pipe.close() if not graph: return 1 # Checking for "Plot Error" here by comparing the md5sums: # make sure the graph doesn't have the same md5sum as known error text, # if it does log an error but don't overwrite the graph. # This is a very terrible hack. tmp_graph = 'TMP_GRAPH' self.cfg.setGraph(tmp_graph, graph) cmd = 'md5sum ' + self.cfg.getGraphFileName(tmp_graph) md5sum_graph_info = commands.getoutput(cmd) md5sum_graph = md5sum_graph_info.split()[0] if (MD5SUM_ERROR_GRAPH == md5sum_graph): logging.error('Borgmon not yet ready to display graph ' + graph_name) return 0 self.cfg.setGraph(graph_name, graph) return 0
def CheckNoMasterBorgmonAlert(ver, testver): """ check if borgmon alert 'GFSMaster_NoMaster' is on. Arguments: ver: '4.6.5' testver: 0 - not a test version. 1 - test version. Returns: 1 - There is a GFSMaster_NoMaster alert. 0 - Otherwise. """ if testver: borgmon_mode = borgmon_util.TEST else: borgmon_mode = borgmon_util.ACTIVE bu = borgmon_util.BorgmonUtil(ver, mode=borgmon_mode) alert_summary = bu.GetBorgmonAlertSummary() if alert_summary: return alert_summary.find('GFSMaster_NoMaster') != -1 else: return 0
def main(argv): if len(argv) < 2: sys.exit(__doc__) #TODO(con): get python2.4 working and remove this if argv[1] == "True": is_testver = 1 elif argv[1] == "False": is_testver = 0 else: try: is_testver = int(argv[1]) except ValueError: sys.exit(__doc__) ver = argv[0] # Create a borgmon_util object to do work for us if is_testver: mode = borgmon_util.TEST else: mode = borgmon_util.ACTIVE borgmonUtil = borgmon_util.BorgmonUtil(ver, mode=mode) # First check if we are a cluster and this is not borgmon master node. # If so, exit. if (core_utils.GetTotalNodes() > 1 and (socket.gethostbyname(borgmonUtil.GetBorgmonHostname()) != socket.gethostbyname(socket.gethostname()))): sys.exit(0) # Now check that /healthz is ok. If so, exit if check_healthz.CheckHealthz(borgmonUtil.GetBorgmonPort()): sys.exit(0) # This is the borgmon master and /healthz failed. print RESTART print 'RESTART'