def restart_instance(self, server_instance): """Restart a server instance. For example, instance = 'ent1:7882' Return False if the server instance is not in the SERVERS list, or if the server_instance could not be parsed into host:port format. """ # Parse server_instance try: host, port = server_instance.split(':') port = int(port) except (ValueError, IndexError): logging.warn("Could not parse %s into host:port format" % server_instance) return false # Check the server is in SERVERS if not host in self.cfg.getGlobalParam('SERVERS').get(port, []): logging.warn("Could not find %s:%s in SERVERS map, " "ignoring restart_instance request" % (host, port)) return false # Restart it logging.info("Restarting server %s:%d" % (host, port)) self.cfg.globalParams.WriteConfigManagerServerRestartRequest(host, port) return true
def _get_current_crawlsummary(self): """Current crawl summary from Borgmon. Returns: {'global-overall-urls-crawl-error': 0, 'global-overall-urls-crawled': 199620.0} """ # We need to map from the identifiers ('global-overall-urls-crawled') # to borgmon exprs ('num_urls_crawled_today') NAME_TO_BORGMON = { 'global-overall-urls-crawled': 'num_urls_crawled_now', 'global-overall-urls-crawl-error': 'num_urls_error_now'} summary = {} uservars = self._getuservars()[1] for param in self.cfg.getGlobalParam('ENT_CRAWL_SUMMARY').keys(): bname = '?' try: bname = NAME_TO_BORGMON[param] summary[param] = uservars[bname] except KeyError: logging.warn('problem finding value for ' + param + ' aka ' + bname) summary[param] = 0.0 logging.info("return: " + str(summary)) return summary
def PollRunningCommands(self): io_happened = 0 # see if anyone finished anyone_finished = 0 # # See if any commands have written stuff back to us, and if so, # write it to the log file. # io_happened = self.ReadFromCommands() for (running_command, popen3object) in self._running_commands.items(): status = popen3object.poll() if status != -1: anyone_finished = 1 status = status >> 8 # keep only actual status running_request = running_command.GetRequest() # cleanup del self._running_commands[running_command] # The command is gone. del self._iopipes[popen3object.fromchild] # And so are its io pipes. del self._iopipes[popen3object.childerr] if status == 0: logging.info('+++ Finished: %s' % (running_request.GetFilename())) self._num_processed_success = self._num_processed_success + 1 self.AddRecentRequest(running_request, 0) self._request_mgr.MoveRequestToSuccess(running_request) self._success_handler(running_request) else: logging.error('+++ Failed (rc=%s): %s' % (status, running_command.GetCmd())) retries = self.GetRetries(running_request.GetType()) attempts = running_request.GetAttempts() if attempts <= retries: self._retry_handler(running_request) # cmd failed and user requested retries and we did not reach # the retry limit. So we dispatch it again. logging.warn('%s retrying %s more times.' % (running_request.GetFilename(), retries - attempts + 1)) running_request.AddAttempt() # Increment count self._num_retries = self._num_retries + 1 running_request.AddStatusz(">>>>> Failure. One more attempt "\ "granted [crt attempt %s]" % attempts) self.DispatchCommand(running_command) else: running_request.AddStatusz(">>>>> Failure. No more attempts") self._num_processed_failure = self._num_processed_failure + 1 self.AddRecentRequest(running_request, 2) self._request_mgr.MoveRequestToFailure(running_request) self._failure_handler(running_request) return anyone_finished or io_happened
def RemoveFile(file): """Delete a file with logging. Arguments: file: string: file path """ if os.path.exists(file): logging.info('Removing %s' % file) try: os.remove(file) except: logging.warn('Error removing %s' % file)
def control_directory(self, dir, max_files, excluded_patterns = None): """ This function does a directory level control (used as a last resort). If a directory has more than max_files, it removes the oldest ones, except those matching excluded_patterns. """ (passed, failed) = self.control_files([(".", max_files, 1)], dir, excluded_patterns) if passed > 0: logging.warn("Removed %d files from directory %s because it had " " more than %d files. Did you miss a pattern?", passed, dir, max_files)
def _distributeFiles(self, dir): """Distributes the files in directory named dir. Returns 0 on success, the return code from E.distribute() otherwise.""" files = os.listdir(dir) try: files.remove('temp') except ValueError: logging.warn("expected to find file 'temp'") files = map(lambda x: os.path.join(dir, x), files) files_str = ' '.join(files) return E.distribute(self.cfg.getGlobalParam('MACHINES'), files_str, 60)
def Parse(self, message_class): """Upgrades the Item to a parsed one, returning true if successful.""" if self.message_class is not None: return 1 try: self.message = message_class(self.message) self.message_class = message_class return 1 except ProtocolBuffer.ProtocolBufferDecodeError: logging.warn("Parse error in message inside MessageSet. Tried " "to parse as: " + message_class.__name__) return 0
def ValidateRequest(self, request): reqtype = request.GetType() if not reqtype in self._command_info.keys(): logging.warn('Skipping unregistered %s request from file %s' % (reqtype, request.GetFilename())) return 0 validator = self.GetValidator(reqtype) if validator: (validator_rc, msg) = validator(request) if not validator_rc: logging.error('%s failed validation: %s' % (request.GetFilename(), msg)) return validator_rc return 1 # Validate passed, or no validator found
def GetActiveVersion(): """ Return the current version of the gsa. Returns None on failure. """ # parse config.google.enterprise to determine the version cmd = 'find /export/hda3/ -name STATE -maxdepth 2 | xargs grep ACTIVE -l' try: f = os.popen(cmd,'r') data = f.read() f.close() pat = re.compile('/export/hda3/([0-9]+\.[0-9]+\.[0-9]+)/STATE') match = pat.search(data) if match: return match.group(1) except IOError, e: logging.warn('IOError in GetActiveVersion: %s' % e)
def ParseDesc(self, cnt=0): """Parse the initial description. This could be Python or C++. Returns: (line_count, lang_type) line_count Line to start parsing flags on (int) lang_type Either 'python' or 'c' (-1, '') if the flags start could not be found """ exec_mod_start = self.executable + ':' after_blank = False cnt = 0 for cnt in range(cnt, len(self.output)): # collect top description line = self.output[cnt].rstrip() # Python flags start with 'flags:\n' if ('flags:' == line and len(self.output) > cnt+1 and '' == self.output[cnt+1].rstrip()): cnt += 2 logging.debug('Flags start (python): %s' % line) return (cnt, 'python') # SWIG flags just have the module name followed by colon. if exec_mod_start == line: logging.debug('Flags start (swig): %s' % line) return (cnt, 'python') # C++ flags begin after a blank line and with a constant string if after_blank and line.startswith(' Flags from '): logging.debug('Flags start (c): %s' % line) return (cnt, 'c') # java flags begin with a constant string if line == 'where flags are': logging.debug('Flags start (java): %s' % line) cnt += 2 # skip "Standard flags:" return (cnt, 'java') logging.debug('Desc: %s' % line) self.desc.append(line) after_blank = (line == '') else: logging.warn('Never found the start of the flags section for "%s"!' % self.long_name) return (-1, '')
def _GetTimeseriesSize(self): """Return the amount of memory (in MB) to allocate to storing timeseries. Returns: Int (MB) """ try: config_type = core_utils.GetEntConfigVar('ENT_CONFIG_TYPE') except AssertionError: # "File not Found" Assertion Error is normal during unit-testing logging.warn("Could not find ENT_CONFIG_TYPE, setting Borgmon timeseries" " size to the oneway default") config_type = 'ONEBOX' if config_type == 'MINI': return 32 # 32 mb on Mini elif config_type == 'LITE' or config_type == 'FULL': return 16 # 16 mb on Mini else: return 256 # 256 all other platforms
def AddDefaultNTPServer(ntp_server_list): """ check to see if a default ntp server needs to be added to the list Args: ntp_server_list: ['time1.corp.google.com', 'time2.corp.google.com'] """ any_good_ntp = 0 for ntp_server in ntp_server_list: (stat, out) = network_diag.check_ntpdate_output(ntp_server) if stat == 0 and 'stratum' in out and int(out['stratum']) < 15: any_good_ntp = 1 break else: logging.warn('Bad NTP server: %s (stat=%d, %s)' % (ntp_server, stat, out)) if not any_good_ntp: default_ntp_server = FindDefaultNTPServer() if default_ntp_server not in ntp_server_list: ntp_server_list.append(default_ntp_server)
def TestNode(node, logging, retry=1): """Currently we define success a node being sshable. We can add more stuff, like checking for same software and os verions etc. A retry mechanism is used to take care of transient errors. """ max_wait = 30 ret = 0 while max_wait > 0: logging.info('Testing node %s' % node) cmd = 'ssh %s echo \$HOSTNAME\: I am alive.' % node #logging.info('Executing %s' % cmd) ret, _ = commands.getstatusoutput(cmd) if ret == 0 or not retry: break logging.warn( 'Node %s is down. Retrying after 5 seconds. %s seconds left.' % (node, max_wait)) max_wait = max_wait - 5 time.sleep(5) return ret
def _GetTimeseriesSize(self): """Return the amount of memory (in MB) to allocate to storing timeseries. Returns: Int (MB) """ try: config_type = core_utils.GetEntConfigVar('ENT_CONFIG_TYPE') except AssertionError: # "File not Found" Assertion Error is normal during unit-testing logging.warn( "Could not find ENT_CONFIG_TYPE, setting Borgmon timeseries" " size to the oneway default") config_type = 'ONEBOX' if config_type == 'MINI': return 32 # 32 mb on Mini elif config_type == 'LITE' or config_type == 'FULL': return 16 # 16 mb on Mini else: return 256 # 256 all other platforms
def GetLiveNodes(logging, retry=1, active_only=1): """Get list of machines from ENT_CONFIG_FILE and checks which machines are up. MACHINES paramter in google_config can be wrong as it takes a while before a node can be removed from the list. It is not going to be very efficient for very large clusters. Arguments: logging - module for logging retry - 1 - retry when testing if a node is active. 0 - otherwise. active_only - 1. only check active machines. 0 -otherwise. Returns: ['ent1', 'ent2', 'ent3', 'ent4'] """ nodelist = GetNodes(active_only) if not active_only: nodecount = len(nodelist) failures = GetNodeFailures(nodecount) logging.info('Total nodes: %s' % nodecount) logging.info('Allowed failures: %s' % failures) logging.info('Checking node status.') deadlist = [] alivelist = [] for node in nodelist: ret = TestNode(node, logging, retry) if ret: logging.warn('Node %s is inaccessible.' % node) deadlist.append(node) else: logging.info('Node %s is accessible.' % node) alivelist.append(node) logging.info('Inaccessible nodes: %s' % deadlist) # checking svs nodes_without_svs = [] for node in alivelist: if not CheckSVSRunning(node): nodes_without_svs.append(node) if len(nodes_without_svs) > 0: logging.info('SVS not running on nodes: %s' % nodes_without_svs) alivelist = [node for node in alivelist if node not in nodes_without_svs] return alivelist
def ReadDataFromCache(cacheKey, expiry=15, cachedir=CACHEDIR): """ Gets the cached reply for cacheKey. This cache prevents SNMP request from being too slow. Input: cacheKey is a string and must not contain any characters that would be "bad" for a filename. expiry: Cached data will expire after expiry seconds. cachedir: directory in which to hold data. Result: cached string or None (no value or expired). """ cachefile = '/%s/snmpcache-%s' % (cachedir, cacheKey) try: age = time.time() - os.stat(cachefile)[stat.ST_MTIME] if age > expiry: return None f = open('%s' % cachefile, 'r') data = f.read() f.close() if len(data) > 1: # sanity check for empty file return data except IOError, e: logging.warn('IOError in ReadDataFromCache: %s' % e)
def run(self): i = self.n while i < len(self.machines): machine = self.machines[i] i = i + self.num_threads cmds = [] if self.command: cmds.append("ssh %s -n %s %s" % (BATCHMODE, machine, commands.mkarg(self.command))) if self.files: cmds.extend( map(lambda f, m=machine: "rsync -u -e \"ssh %s\" -aH %s %s:%s" % (BATCHMODE, f, m, f), string.split(self.files, " "))) for cmd in cmds: cmd = "%s%s" % (ALARM, cmd) # Run and get the error if not QUIET: logging.info("%s: Executing [%s]." % (self.n, cmd)) this_err = os.system(python_exec_wrapper(cmd)) if this_err: # Divide by 256 to get error code from the exit status. this_err = this_err >> 8 if self.files: # we were doing an rsync if this_err == RSYNC_PARTIAL_TRANSFER_ERROR: # If the file went missing then we didn't need the transfer # anyway, so we just continue. logging.warn('%s: File does not exist.' % self.n) continue logging.error("%s: Error %d." % (self.n, this_err)) self.err = this_err break if DELAY: time.sleep(DELAY)
def UserInOwnersFile(cls, username, owners_file): """Check that 'username' is in the OWNERS file 'owners_file'.""" f = None try: try: if owners_file.startswith('google3/production/mpmroot'): filename = os.path.join('/google/src/files/p5/head/depot', owners_file) else: filename = os.path.join('/home/build', owners_file) f = open(filename, 'r') for line in f: m = cls._NON_COMMENT_REGEX.match(line) if m and m.group(0).strip() == username: return True return False except IOError, e: logging.warn('Failed to read Perforce OWNERS file %s: %s', owners_file, e) return False finally: if f: f.close()
def ExecuteWrapper(machines, commandLine, out, alarm, verbose = 1, forceRemote = 0, enthome=None, num_tries=1): """Thin wrapper over E.execute as we need process's return code (parameter to exit()) and E.execute returns exit status. Too late to modify E.execute() method implementation as there is a lot of code that already calls this method. Can't confirm whether any code has come to rely on the fact that E.execute returns exit status code instead of process's return code. Refer E.execute() for it's documentation""" ret = 0 for trial in range(num_tries): ret = E.execute(machines, commandLine, out, alarm, verbose, forceRemote, enthome) if os.WIFEXITED(ret): # child process exit codes are multiplied by 256, so undo this ret = os.WEXITSTATUS(ret) if ret == 0 or (trial + 1) == num_tries: # either we succeed or this was the last try (there is no point in # sleeping after the last try) break logging.warn('%d: Execution of %s failed. Sleeping for 5 seconds...' % (trial, commandLine)) time.sleep(5) return ret
def run(self): i = self.n while i < len(self.machines): machine = self.machines[i] i = i + self.num_threads cmds = [] if self.command: cmds.append("ssh %s -n %s %s" % (BATCHMODE, machine, commands.mkarg(self.command))) if self.files: cmds.extend(map( lambda f, m = machine: "rsync -u -e \"ssh %s\" -aH %s %s:%s" % ( BATCHMODE, f, m, f), string.split(self.files," "))) for cmd in cmds: cmd = "%s%s" % (ALARM, cmd) # Run and get the error if not QUIET: logging.info("%s: Executing [%s]." % (self.n, cmd)) this_err = os.system(python_exec_wrapper(cmd)) if this_err: # Divide by 256 to get error code from the exit status. this_err = this_err >> 8 if self.files: # we were doing an rsync if this_err == RSYNC_PARTIAL_TRANSFER_ERROR: # If the file went missing then we didn't need the transfer # anyway, so we just continue. logging.warn('%s: File does not exist.' % self.n) continue logging.error("%s: Error %d." % (self.n, this_err)) self.err = this_err break if DELAY: time.sleep(DELAY)
def check_ntpdate_output(name): """ run "ntpdate -q" command on a server, and return the result. Args: name - 'time1.corp.google.com' Returns: (0, {'delay': '0.02591', 'stratum': '2', 'offset': '-0.030579', 'server': '172.24.0.11'}) """ cmd = '/usr/sbin/ntpdate -q %s' % commands.mkarg(name) (stat, out) = commands.getstatusoutput(cmd) parsed_out = {} if stat == 0: # only interested in the attributes in the first line lines = out.split('\n') attrs = lines[0].split(',') for i in range(len(attrs)): list = attrs[i].split() parsed_out[list[0].strip()] = list[1].strip() else: logging.warn('Command "%s" failed with exit status %d: %s' % (cmd, stat, out)) return (stat, parsed_out)
def GetAttr(name, pid=None, fallback_to_ps=1): """Retrieves an attribute using /proc/stat file. If the kernel version mismatches or the attribute name is not supported then it can fallback to using ps command based on the value of fallback_to_ps argument. Returns None in case of failure. Otherwise returned attribute value is always a string. """ if pid is None: pid = os.getpid() val = None id = GetColumnId(name, KERNEL_VERSION) if id is not None: try: data = open('/proc/%d/stat' % pid, 'r').read() except: logging.error('Error getting stats for pid %d.' % pid) else: val = data.split()[id] if val is None and fallback_to_ps: # Fallback to using 'ps' logging.warn('Error retrieving value. Using \'ps\'.') val = GetAttrUsingPS(name, pid) return val
def GetExpectedArgv(self, host, port, type): """Get the expected argv for a server as calculated by Babysitter. The expected argv is adapted to suit the argv as exported by the server. For example, C++ binaries will include the binary name in argv, but Java server's will not. Arguments: host: string, "ent1" port: int, 7882 type: string, "authzchecker" Return: string: "--foo --bar", or None if Babysitter does not know the server """ babysitter_cmd = servertype.GetRestartCmd(type, self.cfg, host, port) if babysitter_cmd is None: logging.warn("No babysitter command found for %s:%s (%s), not able to " "export this server on babysitter-argv-sum" % (host, port, type)) return None binary_name = servertype.GetBinaryName(type) cmd = ExtractBinaryArgs(babysitter_cmd, binary_name) if cmd is None: logging.warn("Could not extract binary arguments for %s:%s, not" " able to export this server on babysitter-argv-sum" % (host, port)) logging.warn("Binary name was %s, babysitter command was:" % binary_name) logging.warn(babysitter_cmd) return None # If its not Java then we need to prepend the full binary path if not IsJavaCommand(babysitter_cmd): cmd = "%s %s" % (ExtractBinaryPath(babysitter_cmd, binary_name), cmd) return cmd
def KillLBService(self, service, prefix, lb_reload=0): """Kills all traces of a local babysitter service. First removes the configuration file, then pid file and then kills the process group. Note: This is a best effort method. It never returns failure unless there is a problem in executing a command. """ (conffile, pidfile, binary) = self.GetMiscFiles(prefix) logging.info('Killing %s', service) if not os.path.exists(conffile): logging.warn("Configuration file %s doesn't exist. Ignoring." % conffile) else: RemoveFile(conffile) if lb_reload: self.ForceLocalBabysitterConfigReload() pgid = '' if not os.path.exists(pidfile): # try to guess the process id logging.warn( 'PID file %s not found. Trying to find running processes.' % pidfile) pgids = GetProcessGroupIDs(binary, self.__ver) # there should be at most one process group id if len(pgids) > 1: # TODO(zsyed): we may want to kill all group IDs.? I don't even know if # this can happen. raise core_utils.GenericError, \ "More than one instance %s found" % binary pgid = pgids[0] else: pid_file = open(pidfile, 'r') pgid = pid_file.read().strip() pid_file.close() RemoveFile(pidfile) if not pgid: logging.warn( 'No running processes found for %s, ver=%s. Assuming dead.' % (binary, self.__ver)) else: logging.info('Killing all processes in group %s.' % pgid) core_utils.ExecCmd('kill -9 -%s' % pgid, 'Killing process group %s' % pgid, ignore_errors=1) logging.info('%s stopped.' % service)
def KillLBService(self, service, prefix, lb_reload=0): """Kills all traces of a local babysitter service. First removes the configuration file, then pid file and then kills the process group. Note: This is a best effort method. It never returns failure unless there is a problem in executing a command. """ (conffile, pidfile, binary) = self.GetMiscFiles(prefix) logging.info('Killing %s', service) if not os.path.exists(conffile): logging.warn("Configuration file %s doesn't exist. Ignoring." % conffile) else: RemoveFile(conffile) if lb_reload: self.ForceLocalBabysitterConfigReload() pgid = '' if not os.path.exists(pidfile): # try to guess the process id logging.warn('PID file %s not found. Trying to find running processes.' % pidfile) pgids = GetProcessGroupIDs(binary, self.__ver) # there should be at most one process group id if len(pgids) > 1: # TODO(zsyed): we may want to kill all group IDs.? I don't even know if # this can happen. raise core_utils.GenericError, \ "More than one instance %s found" % binary pgid = pgids[0] else: pid_file = open(pidfile, 'r') pgid = pid_file.read().strip() pid_file.close() RemoveFile(pidfile) if not pgid: logging.warn('No running processes found for %s, ver=%s. Assuming dead.' % (binary, self.__ver)) else: logging.info('Killing all processes in group %s.' % pgid) core_utils.ExecCmd('kill -9 -%s' % pgid, 'Killing process group %s' % pgid, ignore_errors=1) logging.info('%s stopped.' % service)
def main(argv): """Fetches args, reads the config file, and starts the checking. Args: argv: Arguments to the script, not used other than for checking for improper usage. """ # argv should only contain the script that was executed if len(argv) > 1: del argv[0] _PrintUsageAndExit(error='This script takes no positional arguments: %s' % str(argv)) if not FLAGS.config: if FLAGS.users: if FLAGS.users.find(',') >= 0: # More than 1 user given, make user specify configuration _PrintUsageAndExit(error='--config is a required flag.') else: user = FLAGS.users else: user = os.environ.get('USER') email_regex = re.compile(_EMAIL_PAT % re.escape(user), re.MULTILINE) config_root = ('/google/src/head/depot/google3/production/tools/' 'check_groups/conf') if _CheckForEmail(email_regex, ['sre-team'], max_depth=0): logging.info('No --config specified, defaulting to sre.cfg') FLAGS.config = os.path.join(config_root, 'sre.cfg') elif _CheckForEmail(email_regex, ['quantitative-team'], max_depth=0): logging.info('No --config specified, defaulting to quant/quant.cfg') FLAGS.config = os.path.join(config_root, 'quant/quant.cfg') else: _PrintUsageAndExit(error='--config is a required flag.') if os.path.exists(FLAGS.config): pass elif os.path.exists(_DEFAULT_CONFIG_PATH % FLAGS.config): FLAGS.config = _DEFAULT_CONFIG_PATH % FLAGS.config else: _PrintUsageAndExit(error=('Config file %s does not exist or ' 'I can\'t read it!') % FLAGS.config) config = ConfigParser.ConfigParser() config.optionxform = str # Owners require case insensitivity. config.read(FLAGS.config) if FLAGS.list_classes: _ListClasses(config) sys.exit(0) # complain if they don't give us --classes if not FLAGS.classes: _PrintUsageAndExit(error='One of --classes or --list_classes is required.') # Complain if they're running out of /home/build, since they won't get # perforce checks, but don't die since they still get some use of it. if os.getcwd().startswith('/home/build'): logging.warn('**** Running out of /home/build: ' 'Don\'t expect perforce checks to work. ****') # Default to $USER, but use --user if they give it if FLAGS.users: test_users = FLAGS.users.split(',') else: test_users = [os.environ.get('USER')] for each_user in test_users: logging.info('Starting analysis for user %s', each_user) cc = ConfigurationChecker(config, each_user) cc.CheckMemberships(FLAGS.classes)
def Execute(self): logging.info('Autorunning dispatcher starting') draining = 0 done = 0 self._sleep_time = self._min_sleep_time signal.signal(signal.SIGTERM, SigtermHandler) # before starting the loop move any pending requests to # the in dir self._request_mgr.MoveAllPendingRequests() try: while not done: do_continue = 0 try: request = None if not draining: # # Update the request_list, count waiting and running requests, and see # if we can get a new request to start. # self._request_mgr.LookForNewRequests() waiting_counts = self._request_mgr.GetWaitingCounts() running_counts = self.GetRunningCounts() request = self._request_mgr.GetNextRequest(waiting_counts, running_counts) # # See if there is a command to dispatch # if request: request.InitStatuszFile(self._request_mgr.GetStatuszDir()) reqtype = request.GetType() logging.info('=== "%s" request found' % reqtype) self._request_mgr.MoveRequestToPending(request) request.AddStatusz("Starting process") if reqtype == TERMINATE: # # Terminate requests make us stop doing new commands. # draining = 1 logging.info('=== Draining running commands.') request.AddStatusz("Request drained") self._num_processed_success = self._num_processed_success + 1 self.AddRecentRequest(request, 0) self._request_mgr.MoveRequestToSuccess(request) else: # # Validate the command and dispatch it if it looks good, continue # if this request was not valid. # if not self.ValidateRequest(request): request.AddStatusz("Request has invalid parameters") self._request_mgr.MoveRequestToFailure(request) self._failure_handler(request) self._num_processed_failure = self._num_processed_failure + 1 self.AddRecentRequest(request, 1) # Note: cannot add a continue here in while: try: do_continue = 1 # Go get another request, this one sucked. else: command = RunnableCommand(request, self.GetCmdInfo(reqtype)) request.AddStatusz("Running request cmd [%s]" % command.GetCmd()) self.DispatchCommand(command) if not do_continue: # # See if any commands have completed or any IO is waiting. # command_did_something = self.PollRunningCommands() if not request and not command_did_something: # # No request started, nothing read, and nothing finished, # so sleep briefly, and increase length of next sleep. # time.sleep(self._sleep_time) self._sleep_time = min(self._sleep_time*SLEEP_TIME_FACTOR, self._max_sleep_time) else: # Something happened, so reduce sleep time to minimum self._sleep_time = self._min_sleep_time # # If we're draining and there are no running commands, we are done # if draining and self._running_commands == {}: logging.info('=== No more requests, exiting') done = 1 # # Sync the request manager directories # self._request_mgr.SyncRequestDirs() except SIGTERMInterrupt: # Catch SIGTERM logging.warn('SIGTERMInterrupt caught, shutting down') draining = 1 except KeyboardInterrupt: # Catch control-C logging.warn('KeyboardInterrupt, exiting immediatly') raise
def SigtermHandler(dummy, _): logging.warn('SIGTERM received, shutting down') raise SIGTERMInterrupt
Returns None on failure. """ # parse config.google.enterprise to determine the version cmd = 'find /export/hda3/ -name STATE -maxdepth 2 | xargs grep ACTIVE -l' try: f = os.popen(cmd,'r') data = f.read() f.close() pat = re.compile('/export/hda3/([0-9]+\.[0-9]+\.[0-9]+)/STATE') match = pat.search(data) if match: return match.group(1) except IOError, e: logging.warn('IOError in GetActiveVersion: %s' % e) except OSError, e: logging.warn('OSError in GetActiveVersion: %s' % e) return None CACHEDIR='/var/cache/ent-snmp' def ReadDataFromCache(cacheKey, expiry=15, cachedir=CACHEDIR): """ Gets the cached reply for cacheKey. This cache prevents SNMP request from being too slow. Input: cacheKey is a string and must not contain any characters that would be "bad" for a filename. expiry: Cached data will expire after expiry seconds. cachedir: directory in which to hold data. Result: cached string or None (no value or expired). """ cachefile = '/%s/snmpcache-%s' % (cachedir, cacheKey)