def main(argv):
    if len(argv) < 2:
        sys.exit(__doc__)

    #TODO(con): get python2.4 working and remove this
    if argv[1] == "True":
        is_testver = 1
    elif argv[1] == "False":
        is_testver = 0
    else:
        try:
            is_testver = int(argv[1])
        except ValueError:
            sys.exit(__doc__)

    ver = argv[0]

    if is_testver:
        borgmon_mode = borgmon_util.TEST
    else:
        borgmon_mode = borgmon_util.ACTIVE
    bu = borgmon_util.BorgmonUtil(ver, mode=borgmon_mode)

    # Now check /healthz
    if not check_healthz.CheckHealthz(bu.GetReactorPort()):
        print 'RESTART'
Example #2
0
 def getGraphURL(self, graph_name, auto_resize, wide_lines):
     mode = borgmon_util.INSTALL_STATE_TO_MODE_MAP.get(
         self.install_state, borgmon_util.ACTIVE)  # default to ACTIVE
     bu = borgmon_util.BorgmonUtil(self.getGlobalParam("VERSION"),
                                   mode=mode)
     host = bu.GetBorgmonHostname()
     port = bu.GetBorgmonPort()
     vars_url = ""
     if auto_resize:
         yrange = ''
     else:
         yrange = 10
     if wide_lines:
         width = '6'
     else:
         width = '3'
     yformat = "%.1s%c"
     gmt_offset = int(time.strftime('%z')) / 100
     return GRAPH_URL % {
         'borgmon_host': host,
         'borgmon_port': port,
         'rules': string.join(self.graphs[graph_name].vars, ';'),
         'yrange': yrange,
         'width': width,
         'yformat': yformat,
         'gmt': gmt_offset,
     }
def GetBorgmonVarValueCached(var, expr):
  """Call Borgmon to get the value of expr.
  First checks the cache for 'var', Borgmon is not checked if its in cache.
  Otherwise get 'expr' from Borgmon, and update the cache.

  Arguments:
    var: string: cache key to use
    expr: string: borgmon expression

  Returns:
    string: Borgmon reply, stripped of newlines etc, or None if there is any
            error
  """
  # check the cache first
  result = ReadDataFromCache(var)
  if result:
    return result
  version = GetActiveVersion()
  if version is None:
    return None
  # Assume we are in active mode (borgmon port 4911)
  # There could be a problem here for SNMP in test mode
  bu = borgmon_util.BorgmonUtil(version, mode=borgmon_util.ACTIVE)
  reply = bu.GetAndEvalBorgmonExpr(expr)
  if reply is None:
    return None
  reply = str(reply)
  WriteDataToCache(var, reply)
  return reply
    def _getuservars(self):
        """Return user-facing variables that we monitor in a two forms, in a 2-ple.
       The first entry will be a formatting string of name,value pairs,
       and the second is a dict mapping the name (e.g. 'num_urls_error')
       to the value.
    """

        logging.info('borgmon_handler :: getuservars')
        vars = {
            'num_urls_in_index_total':
            'num_urls_in_index_total',
            'num_urls_available_total':
            'num_urls_available_total',
            'interval_pages_per_sec_total':
            'interval_pages_per_sec_total',
            'num_urls_error':
            'num_urls_error[%ds] - min(num_urls_error[%ds])' %
            (eval('secsSinceMidnight()'), eval('secsSinceMidnight()')),
            'num_urls_error_now':
            'num_urls_error',
            'gws_searches_per_minute':
            'gws_searches_per_minute',
            'num_urls_crawled_today':
            'num_urls_crawled[%ds] - min(num_urls_crawled[%ds])' %
            (eval('secsSinceMidnight()'), eval('secsSinceMidnight()')),
            'num_urls_crawled_now':
            'num_urls_crawled',
            'num_urls_served':
            'num_urls_served',
            'scheduled_crawl_start_time':
            'scheduled_crawl_start_time',
            'scheduled_crawl_end_time':
            'scheduled_crawl_end_time',
            'scheduled_crawl_is_running':
            'scheduled_crawl_is_running',
            'doc_bytes_received':
            'doc_bytes_received',
        }
        var_map = {}
        for var in vars.keys():
            bu = borgmon_util.BorgmonUtil(self.version, mode=self.borgmon_mode)
            value = bu.GetAndEvalBorgmonExpr(vars[var])
            if value:
                var_map[var] = value

        ret = '0\n' + string.join(
            map(lambda k: "%s=%s" %
                (k, str(var_map[k])), var_map.keys()), '\n') + '\n'
        logging.info('returning ' + ret)
        return (ret, var_map)
    def makegraph(self, graph_name):
        """
    Generates the sum urls crawled and queries per minute graph.
    Returns 0 if the graph gets genarated successfully, otherwise returns 0.
    For queries per minute graph - if the max QPM is less than 10 then we dont
    autoresize. This is to show foo bar queries at the bottom of the graph.
    TODO(meghna) Should we use GRAPH_DELAY_SECONDS somewhere here ?
    """
        logging.info('borgmon makegraph ' + graph_name)
        auto_resize = 1
        if (graph_name == 'QUERIES_PER_MINUTE'
                or graph_name == 'QUERIES_PER_MINUTE_THUMBNAIL'):
            bu = borgmon_util.BorgmonUtil(self.version, mode=self.borgmon_mode)
            value = bu.GetAndEvalBorgmonExpr(
                'max(gws_searches_per_minute[24h])')
            if not value or value < 10:
                auto_resize = 0
        wide_lines = 0
        if (graph_name == 'QUERIES_PER_MINUTE_THUMBNAIL'
                or graph_name == 'SUM_URLS_TOTAL_THUMBNAIL'):
            wide_lines = 1
        graph_url = self.cfg.getGraphURL(graph_name, auto_resize, wide_lines)
        logging.info('graph_url=' + graph_url)
        if not graph_url:
            return 1

        graph_pipe = urllib.urlopen(graph_url)
        graph = graph_pipe.read()
        graph_pipe.close()
        if not graph:
            return 1

        # Checking for "Plot Error" here by comparing the md5sums:
        # make sure the graph doesn't have the same md5sum as known error text,
        # if it does log an error but don't overwrite the graph.
        # This is a very terrible hack.
        tmp_graph = 'TMP_GRAPH'
        self.cfg.setGraph(tmp_graph, graph)
        cmd = 'md5sum ' + self.cfg.getGraphFileName(tmp_graph)
        md5sum_graph_info = commands.getoutput(cmd)
        md5sum_graph = md5sum_graph_info.split()[0]
        if (MD5SUM_ERROR_GRAPH == md5sum_graph):
            logging.error('Borgmon not yet ready to display graph ' +
                          graph_name)
            return 0
        self.cfg.setGraph(graph_name, graph)
        return 0
Example #6
0
def CheckNoMasterBorgmonAlert(ver, testver):
    """ check if borgmon alert 'GFSMaster_NoMaster' is on.

  Arguments:
    ver:     '4.6.5'
    testver: 0 - not a test version. 1 - test version.

  Returns:
    1 - There is a GFSMaster_NoMaster alert.  0 - Otherwise.
  """
    if testver:
        borgmon_mode = borgmon_util.TEST
    else:
        borgmon_mode = borgmon_util.ACTIVE
    bu = borgmon_util.BorgmonUtil(ver, mode=borgmon_mode)
    alert_summary = bu.GetBorgmonAlertSummary()
    if alert_summary:
        return alert_summary.find('GFSMaster_NoMaster') != -1
    else:
        return 0
Example #7
0
def main(argv):
    if len(argv) < 2:
        sys.exit(__doc__)

    #TODO(con): get python2.4 working and remove this
    if argv[1] == "True":
        is_testver = 1
    elif argv[1] == "False":
        is_testver = 0
    else:
        try:
            is_testver = int(argv[1])
        except ValueError:
            sys.exit(__doc__)

    ver = argv[0]

    # Create a borgmon_util object to do work for us
    if is_testver:
        mode = borgmon_util.TEST
    else:
        mode = borgmon_util.ACTIVE
    borgmonUtil = borgmon_util.BorgmonUtil(ver, mode=mode)

    # First check if we are a cluster and this is not borgmon master node.
    # If so, exit.
    if (core_utils.GetTotalNodes() > 1
            and (socket.gethostbyname(borgmonUtil.GetBorgmonHostname()) !=
                 socket.gethostbyname(socket.gethostname()))):
        sys.exit(0)

    # Now check that /healthz is ok. If so, exit
    if check_healthz.CheckHealthz(borgmonUtil.GetBorgmonPort()):
        sys.exit(0)

    # This is the borgmon master and /healthz failed. print RESTART
    print 'RESTART'