def ExecCmd(cmd, info=None, ignore_errors=0, success_msg=None, failure_msg=None): """Executes cmd. If command exits with code 0 then then stdout & stderr (combined) returned. Otherwise raise CommandExecError. Can be suppressed with ignore_errors. """ if info is None: info = cmd logging.info('Executing: %s' % info) if failure_msg == None: failure_msg = 'Failure: %s' % info if success_msg == None: success_msg = 'Success: %s' % info out = os.popen('%s 2>&1' % cmd) out_text = out.read() ret = out.close() if not ret == None: logging.error('Error executing %s:\n%s' % (cmd, out_text)) logging.error(failure_msg) if ignore_errors: logging.warn('Ignoring error.') else: raise CommandExecError, 'info: %s cmd: %s: return code: %d' % \ (info, cmd, ret) else: logging.info(success_msg) return out_text
def stop(self): if self.IsFederationLicensed() and self.corpus is not None: logging.info(" -- stopping federation network -- ") self.corpus.Stop() else: logging.info(" Federation network not stopped -- No License or Invalid Configuration") return 1
def checkAndReplaceDevIfExistInAcc(self, dev_sn, dev_fake_sn, dev_type): """Check if device exists in any account and replace if yes. Args: dev_sn: Device Serial Number dev_fake_sn: Device Fake Serial Number dev_type: Device type, ONT, RG, STGBox or TVBox Returns: In Successful returns status [True]. If Replace failed, returns False. If device does not exist in any account, returns 0. """ succ, result = self.fiber_data_client.GetDevices(dev_sn) self.assertTrue(succ, 'GetDevice RPC call failed on FDS.') account_temp = self.fiber_data_client.ParseDeviceAccountID(result) if account_temp is not None: dev_re_status, dev_re_resp = self.fiber_data_client.ReplaceDevice( account_temp, dev_sn, dev_fake_sn) logging.info('%s Replace Status %s , %s', dev_type, dev_re_status, dev_re_resp) if dev_re_status: return dev_re_status else: return False else: return 0
def getparams(self, policy_name, group, error_info=None): """Get the parameters for a group of scoring adjustments. Returned as a string. Result is an empty string if the group is unknown or has no data.""" error_str = None policy = None if policy_name == ScoringAdjustHandler.DEFAULT_POLICY_NAME: policy = self.cfg.getGlobalParam(C.ENT_SCORING_ADJUST) else: policies = self.cfg.getGlobalParam( C.ENT_SCORING_ADDITIONAL_POLICIES) if policies and policy_name in policies: policy = policies[policy_name] if not policy: error_str = "No policy named '%s'" % policy_name elif group not in policy: error_str = "No group '%s' for policy '%s'" % (group, policy_name) elif not policy[group]: error_str = ("Empty param list for policy '%s' group '%s'" % (policy_name, group)) else: return policy[group] logging.info("No params available: %s" % error_str) if error_info is not None: # Return error string for testing purposes. error_info.append(error_str) return ""
def InitMdb(hostname_list=None, mdb=None): """ Init Machine Database by reading from svs Arguments: hostname_list: ['ent1', 'ent2'] mdb: for unittest only. The mdb to be returned by the function. Returns: The machine database, which is a dictionary. If SVS on a machine is not accessible, its entry will be None. {'ent1': {'hdcnt': '4', 'disk_size_GB': 'map:disk hda3:225.376007 hdb3\ :227.246372 hdc3:227.246372 hdd3:227.246372 sda1:3.845303', ... }, 'ent2': None } """ if mdb is not None: return mdb if hostname_list is None: hostname_list = core_utils.GetEntConfigVar('MACHINES') ent_config = core_utils.GetEntConfigVar('ENT_CONFIG_TYPE') if ent_config == 'LITE' or ent_config == 'FULL': mdb = get_loki_param.InitMdb() else: mdb = MachineInfo(hostname_list) logging.info('Init Machine Database from SVS for %s\n' % hostname_list) logging.info('mdb = %s\n' % mdb) return mdb
def _RunServeCmd(cfg, version, cmd, allnodes=0): """Run serve_service command. cmd: 'stop', 'start', 'activate', 'deactivate' allnodes: 1 to run command on all nodes """ serve_service_cmd = ( '/export/hda3/%s/local/google3/enterprise/legacy/scripts/' 'serve_service.py %s %s' % (version, cfg.getGlobalParam('ENTERPRISE_HOME'), cmd)) logging.info('Running: %s' % serve_service_cmd) if allnodes: machines = cfg.getGlobalParam(C.MACHINES) else: machines = [E.getCrtHostName()] if E.execute(machines, SECURE_WRAPPER_COMMAND % ( \ cfg.getGlobalParam('ENTERPRISE_HOME'), '-p2', serve_service_cmd), None, 0) != E.ERR_OK: logging.error('%s: failed' % serve_service_cmd) return 1 logging.info('%s: completed' % serve_service_cmd) return 0
def Start(self): """Start the connect to all Slaves or Corpus Roots.""" corpus_roots = self.__super_root.GetCorpusRoots() # stop all connections self.Stop() status_start = 0 message_start = 'Success' for corpus_root in corpus_roots: (status_reach, message) = self.Status(corpus_root.GetId()) if status_reach: (status_connect, message) = self.Connect(corpus_root.GetId()) if status_connect: logging.error( 'Connect to the %s appliance failed %d %s' % (corpus_root.GetId(), status_connect, message)) status_start = status_connect message_start = message else: logging.info('Connected to appliance %s' % corpus_root.GetId()) else: logging.info('Connection existing to appliance %s' % corpus_root.GetId()) return (status_start, message_start)
def run(self): i = self.n while i < len(self.jobs): (cfg, gwssers, site, testwords, epochs, num) = self.jobs[i] i = i + NUM_THREADS # do the tests on all gwssers - do 2 tries, 15 seconds apart max_epoch_site = -1 for (gws, port) in gwssers: cmd = ". %s; cd %s/local/google3/enterprise/legacy/checks && "\ "./gws_production_check.py %s %d %s %s %s %d" % ( cfg.getGlobalParam('ENTERPRISE_BASHRC'), cfg.entHome, commands.mkarg(gws), port, commands.mkarg(site), commands.mkarg(testwords), commands.mkarg(string.join(map(str, epochs), ",")), num) logging.info("Executing %s" % cmd) (err, msgs) = E.getstatusoutput(cmd) max_epoch = None; errors = None exec("(max_epoch, errors) = %s" % msgs) if max_epoch > max_epoch_site: max_epoch_site = max_epoch if errors: self.errors[site] = errors self.max_epochs[site] = max_epoch_site os.remove(testwords)
def getPasswd(self, name, ip): """ This sets a new password for a user and mails it to the user. (We touch or keep it) returns success status (boolean) """ # Refuse changing password for username google. this is a special username # that we use as a back-door for controling the box. Changing this password # may make it inaccessible (bug#36271) if name == 'google': logging.info("Refusing to set password for user %s" % name) return false newPassword = password.createRandomPasswd(PASSWORD_LENGTH) if self.check_update_user(name, None, newPassword): SendMail.send(self.cfg, self.getEmail(name), false, M.MSG_FORGOTPASSWORDSUBJECT, M.MSG_FORGOTPASSWORD % (newPassword, ip), false) self.cfg.writeAdminRunnerOpMsg( "A new password has been sent to your email address") return true logging.error("couldn't set password to user %s" % name) return false
def restart_instance(self, server_instance): """Restart a server instance. For example, instance = 'ent1:7882' Return False if the server instance is not in the SERVERS list, or if the server_instance could not be parsed into host:port format. """ # Parse server_instance try: host, port = server_instance.split(':') port = int(port) except (ValueError, IndexError): logging.warn("Could not parse %s into host:port format" % server_instance) return false # Check the server is in SERVERS if not host in self.cfg.getGlobalParam('SERVERS').get(port, []): logging.warn("Could not find %s:%s in SERVERS map, " "ignoring restart_instance request" % (host, port)) return false # Restart it logging.info("Restarting server %s:%d" % (host, port)) self.cfg.globalParams.WriteConfigManagerServerRestartRequest( host, port) return true
def getpagecount(self, collection, uriAt, sort, view, partial_match, flatList, logging_me=True, debugging_me=False): """Get page count""" if logging_me: logging.info( "[diagnose_handler:getpagecount] Sitemap flatList = " + flatList) servers = (self.cfg.globalParams.GetServerManager().Set( 'urltracker_server').Servers()) uriAt = self.SanitizeURI(uriAt) for server in servers: client = urltracker_client.URLTrackerClient( server.host(), int(server.port())) contents = client.GetPageCount(string.strip(collection), string.strip(uriAt), string.strip(sort), string.strip(view), self.GetIntValue(partial_match), self.GetIntValue(flatList)) if contents == None: continue return 'response = %s\n' % repr(contents) return 'response = []\n'
def _get_current_crawlsummary(self): """Current crawl summary from Borgmon. Returns: {'global-overall-urls-crawl-error': 0, 'global-overall-urls-crawled': 199620.0} """ # We need to map from the identifiers ('global-overall-urls-crawled') # to borgmon exprs ('num_urls_crawled_today') NAME_TO_BORGMON = { 'global-overall-urls-crawled': 'num_urls_crawled_now', 'global-overall-urls-crawl-error': 'num_urls_error_now' } summary = {} uservars = self._getuservars()[1] for param in self.cfg.getGlobalParam('ENT_CRAWL_SUMMARY').keys(): bname = '?' try: bname = NAME_TO_BORGMON[param] summary[param] = uservars[bname] except KeyError: logging.warn('problem finding value for ' + param + ' aka ' + bname) summary[param] = 0.0 logging.info("return: " + str(summary)) return summary
def getcertinfo(self, whichcert): """ returns information about the currently installed, or the staging certificate whichCert is "staging", or "installed" returns 0 hostname organizational unit organization locality state country email notValidBefore date notValidAfter date on success, or 1 on failure""" retcode, result = E.getstatusoutput( "%s getcertinfo %s %s" % (self.sslWrapperPath, whichcert, self.cfg.getGlobalParam("ENTERPRISE_HOME"))) if retcode == 0: return "0\n%s" % result else: logging.info("Couldn't get cert info for %s: %s" % (whichcert, result)) return "1"
def renice_svs(machine, new_priority=SVS_NICE_VALUE): """Renices svs on 'machine' with new_priority. Returns 0 on failure, 1 on success. """ pid = None ret = 0 # Get pid of svs. try: pid = open(SVS_PID_FILE, 'r').read() except: err = str(sys.exc_info()[:2]) logging.error('Error reading SVS pid from %s: %s' % (SVS_PID_FILE, err)) if pid is not None: # Get nice value for pid old_priority = os_utils.GetAttr('nice', int(pid)) if old_priority is not None and int(old_priority) != new_priority: logging.info('Renicing SVS to %d.' % new_priority) # Get group id from pid. pgid = os_utils.GetAttr('pgid', int(pid)) # Renice the whole group. cmd = 'renice %d -g %d' % (new_priority, int(pgid)) rc = E.execute([machine], cmd, None, 1) if rc == 0: ret = 1 else: logging.error('Error renicing SVS: %d' % ret) return ret
def CreateDefaultBackendFiles(self): """Initialize data files for backend servers to start up normally""" def TouchFile(global_params, filename): """ check to see if filename exists, create if it does not exists """ # first check if file exists ls_cmd = "ls %s" % filename err, out = E.run_fileutil_command(self.globalParams, ls_cmd) if err != E.ERR_OK: # create if not exists create_cmd = "truncate %s 0" % filename err, out = E.run_fileutil_command(self.globalParams, create_cmd) if err != E.ERR_OK: logging.fatal("Could not create file: %s" % filename) if not self.getGlobalParam(C.GFS_CELL): # touch urlmanager remove doc log file umremovedoc_file = "%s-from-00000-000-of-001seq00000" % \ self.globalParams.var( 'WORKSCHEDULER_REMOVEDOC_UM_LOG_PREFIX') logging.info("Create empty removedoc log for urlmanager: %s" % umremovedoc_file) TouchFile(self.globalParams, umremovedoc_file) # touch urlmanager urlscheduler log file umurlscheduler_file = "/bigfile/%s-from-00000-000-of-001seq00000" % \ self.globalParams.var( 'URLMANAGER_URLSCHEDULER_LOG_PREFIX') logging.info("Create empty urlscheduler log for urlmanager: %s" % umurlscheduler_file) TouchFile(self.globalParams, umurlscheduler_file)
def recrawl_url_patterns(self, url_patterns): ret = 0 errors = self.cfg.globalParams.set_file_var_content('RECRAWL_URL_PATTERNS', url_patterns, 1) if errors != validatorlib.VALID_OK: return 1 host_port = self.cfg.globalParams.GetServerHostPorts("supergsa_main") if len(host_port) != 1: logging.error("Found more than 1 supergsa_main backend : %s" %host_port) return 2 # Send a request to the supergsa_main binary and timeout after 60 seconds. status, output = commands.getstatusoutput("curl --max-time 60 -Ssi " "--data-binary @%s http://%s:%s/recrawlmatchingurls" %(self.cfg.getGlobalParam('RECRAWL_URL_PATTERNS'), host_port[0][0], host_port[0][1])) if status == 0 and output.startswith('HTTP/1.1 200'): logging.info("Recrawl request was successfully submitted.") else: logging.error("Recrawl request could not be submitted. " "Reason (status/output)\n: %s/%s" %(status, output)) ret = 2 return ret
def SendCommand(server_host, server_port, cr_request): logging.info('Sending a crawl_queue request to supergsa_main.') h = httplib.HTTPConnection(server_host, server_port) h.request('POST', '/generatecrawlqueue', cr_request.Encode()) r = h.getresponse() data = r.read() return crawlreport_pb.CrawlQueueResponse(data)
def start(self): if self.IsFederationLicensed() and os.path.exists(self.config_file): logging.info(' -- starting federation network -- ') # start logging only if federation is enabled log_file_name = ('/export/hda3/tmp/fed_network_client_%s' % time.strftime('%d-%b-%y')) log_file = open(log_file_name, 'a+') logging.set_logfile(log_file) logging.set_verbosity(logging.DEBUG) sys_abstraction = stunnel_jail.GetSystemAbstraction() # setup the stunnel jail jail = stunnel_jail.StunnelJail(fed_stunnel_config.STUNNEL_CLIENT_CHROOT, sys_abstraction) (status_jail, message) = jail.Setup() if status_jail: logging.error('The CHROOT Jail could not be setup %s' % message) return 1 try: fed_config = fed_network_config.FederationConfig(self.config_file, None, sys_abstraction) logging.info('Federation config read successfully') client = fed_network_util.SuperRootStunnelService(sys_abstraction, fed_config) except fed_network_config.FederationConfigException, ex: logging.error('Exception in configuration %s' % ex.message) return 1 else: # Connect to all the slaves (status_connect, message) = client.Start() # Create the config root (status_config, message) = CreateSuperRootConfig(self.ent_home)
def _StatusFileCmd(cmd, version, out=[], extra_arg='', unittestdir=None): """Perform a command on the RESET_STATE status file. On a cluster, runs lockserv <cmd> /ls/ent4-x-x/RESET_STATE. On a oneway, runs cmd on /export/hda3/4.x.x/RESET_STATE cmd should be cat, setcontents, or rm. Return: None for oneway, 0 for success, 1 for error Command output returned in out. """ if unittestdir != None or 1 == len(core_utils.GetNodes()): # unitest or Oneway if unittestdir != None: file = '/%s/%s/RESET_STATE' % (unittestdir, version) else: file = '/export/hda3/%s/RESET_STATE' % version if cmd == 'cat': status = _ExecuteCommand('cat %s' % file, out=out) elif cmd == 'setcontents': status = _ExecuteCommand('echo "%s" > %s' % (extra_arg, file)) elif cmd == 'rm': status = _ExecuteCommand('rm -f %s' % file) else: logging.error('StatusFileCmd: bad command %s' % cmd) return 1 return status lockserv_cmd_prefix = core_utils.GetLSClientCmd( version, install_utilities.is_test(version)) chubby_file = '/ls/%s/RESET_STATE' % core_utils.GetCellName(version) lockserv_cmd = '%s %s %s %s' % (lockserv_cmd_prefix, cmd, chubby_file, extra_arg) logging.info('Reset index: executing %s' % lockserv_cmd) status = _ExecuteCommand(lockserv_cmd) return status
def getPasswd(self, name, ip): """ This sets a new password for a user and mails it to the user. (We touch or keep it) returns success status (boolean) """ # Refuse changing password for username google. this is a special username # that we use as a back-door for controling the box. Changing this password # may make it inaccessible (bug#36271) if name == 'google' : logging.info("Refusing to set password for user %s" % name) return false newPassword = password.createRandomPasswd(PASSWORD_LENGTH) if self.check_update_user(name, None, newPassword): SendMail.send(self.cfg, self.getEmail(name), false, M.MSG_FORGOTPASSWORDSUBJECT, M.MSG_FORGOTPASSWORD % (newPassword, ip), false) self.cfg.writeAdminRunnerOpMsg( "A new password has been sent to your email address") return true logging.error("couldn't set password to user %s" % name) return false
def main(argv): """ runs PeriodicScript() function in a loop""" # global MACHINES # global MAIN_GOOGLE3_DIR if len(argv) != 1: sys.exit(__doc__) try: logging.info("Running periodic_script (pid = %d)..." % os.getpid()) config = entconfig.EntConfig(argv[0]) # user config if not config.Load(): sys.exit("Cannot load the config file %s" % argv[0]) PeriodicScript(config) svs_utilities.CheckSvsAlive(["localhost"]) monitorSnmp() EnableGFS(config) EnableNamed() DnsConfig(config) admin_runner_utils.SyncOneboxLog(config) WarmIndex(config) logging.info("Finished periodic_script.") except: # collect the exception traceback so we know what went wrong (t, v, tb) = sys.exc_info() logging.error("\nPeriodic script: Fatal Error:\n" + "=======================\n" + string.join(traceback.format_exception(t, v, tb)))
def _UnrecoverableGFSDirs(cfg): """ Unrecoverable GFS Dirs return a list of GFS directories that cannot be removed when clearing the index Args: cfg: entconfig.EntConfig(entHome, 1) Return: ['/gfs/ent/feeds', '/gfs/ent/feedstatus', '/gfs/ent/logs'] """ unrecoverable_dirs = [] # FEEDS_DIR, FEED_STATUS_DIR unrecoverable_dirs.append(cfg.getGlobalParam('FEEDS_DIR')) unrecoverable_dirs.append(cfg.getGlobalParam('FEED_STATUS_DIR')) unrecoverable_dirs.append(cfg.getGlobalParam('LOG_REPORT_DIR')) unrecoverable_dirs.append(cfg.getGlobalParam('CRAWLQUEUE_DIR')) unrecoverable_dirs.append(cfg.getGlobalParam('SYSLOG_CHECKPOINTS_DIR')) namespace_prefix = cfg.getGlobalParam('NAMESPACE_PREFIX') unrecoverable_dirs.append(cfg.getGlobalParam('CRAWL_LOGDIR')) logging.info('Reset Index: Skipping Unrecoverable GFS Dirs:') logging.info(' %s' % unrecoverable_dirs) return unrecoverable_dirs
def StartBorgmon(self): """Start Borgmon and Borgmon Reactor by localbabysitter """ logging.info('Starting Borgmon') lb_dict = self._CreateLocalBabysitterDict() self.__lb_util.StartLBService('borgmon', 'borgmon', lb_dict) self.__lb_util.StartLBService('reactor', 'reactor', lb_dict) self.__lb_util.ForceLocalBabysitterConfigReload()
def doLogReport(self, jobName, jobToken, collection, reportName, reportDate, withResults, topCount, diagnosticTerms, update): """The actual work done in a worker thread to generate a summary report.""" (html_file, valid_file) = liblog.get_report_filenames(self.entConfig, liblog.SUMMARY_REPORT, reportName, collection) liblog.MakeGoogleDir(self.entConfig, os.path.dirname(html_file)) new_html_file = tempfile.mktemp('.report') new_valid_file = tempfile.mktemp('.report_valid') args = [] args.append(commands.mkarg(self.entConfig.GetEntHome())) args.append(commands.mkarg(collection)) args.append(commands.mkarg(reportDate)) args.append(withResults) args.append(topCount) args.append(commands.mkarg(diagnosticTerms)) args.append(commands.mkarg(html_file)) args.append(commands.mkarg(valid_file)) args.append(commands.mkarg(new_html_file)) args.append(commands.mkarg(new_valid_file)) cmd = ('. %s && cd %s/enterprise/legacy/logs && ' 'alarm %s nice -n 15 ./log_report.py %s' % (self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.getGlobalParam('MAIN_GOOGLE3_DIR'), COMMAND_TIMEOUT_PERIOD, string.join(args, ' '))) logging.info('doLogReport(): CMD = %s' % cmd) returnCode = E.system(cmd) self.handleResult(jobName, jobToken, returnCode, liblog.SUMMARY_REPORT, collection, reportName, update, html_file, valid_file, new_html_file, new_valid_file)
def babysit(self): logging.info(' --babysit--') if self.IsFederationLicensed() and self.corpus is not None: (status, message) = self.corpus.Status() if status: self.corpus.Start() return 1
def deleteCollection(self, collection): """Delete all reports and logs for a particular collection.""" self.logreplock.acquire() try: for reportType in [liblog.RAW_REPORT, liblog.SUMMARY_REPORT]: reports = self.getLogReports(collection, reportType) for report in reports: # stop running job if report is being (re)generated. if report.completeState != COMPLETE: self.stopRunningJob(self.jobName(report)) # delete data files if any. (html_file, valid_file) = liblog.get_report_filenames(self.entConfig, reportType, report.reportName, collection) self.RemoveReportFiles(html_file, valid_file) self.reportCount[reportType] -= len(reports) logging.info('Delete total %d reports of type %s for collection %s.' % ( len(reports), reportType, collection)) listfile = liblog.get_report_list_filename(self.entConfig, reportType, collection) (err, out) = E.run_fileutil_command(self.entConfig, 'rm -f %s' % listfile) if err: logging.error('Cannot remove list file %s.' % listfile) report_collection_dir = liblog.get_report_collection_dir(self.entConfig, collection) (err, out) = E.run_fileutil_command(self.entConfig, 'rmdir %s' % report_collection_dir) if err: logging.error('Cannot delete unused directory %s' % \ report_collection_dir) finally: self.logreplock.release()
def parse_resegmentation_filename(filename): logging.info('processing: %s', filename) id1, id2, x, y, z = [ int(t) for t in re.search(r'(\d+)-(\d+)_at_(\d+)_(\d+)_(\d+)', filename).groups() ] return id1, id2, x, y, z
def getcertinfo(self, whichcert): """ returns information about the currently installed, or the staging certificate whichCert is "staging", or "installed" returns 0 hostname organizational unit organization locality state country email notValidBefore date notValidAfter date on success, or 1 on failure""" retcode, result = E.getstatusoutput( "%s getcertinfo %s %s" % (self.sslWrapperPath, whichcert, self.cfg.getGlobalParam("ENTERPRISE_HOME")) ) if retcode == 0: return "0\n%s" % result else: logging.info("Couldn't get cert info for %s: %s" % (whichcert, result)) return "1"
def babysit(self): logging.info(" --babysit--") if self.IsFederationLicensed() and self.corpus is not None: (status, message) = self.corpus.Status() if status: self.corpus.Start() return 1
def restart_instance(self, server_instance): """Restart a server instance. For example, instance = 'ent1:7882' Return False if the server instance is not in the SERVERS list, or if the server_instance could not be parsed into host:port format. """ # Parse server_instance try: host, port = server_instance.split(':') port = int(port) except (ValueError, IndexError): logging.warn("Could not parse %s into host:port format" % server_instance) return false # Check the server is in SERVERS if not host in self.cfg.getGlobalParam('SERVERS').get(port, []): logging.warn("Could not find %s:%s in SERVERS map, " "ignoring restart_instance request" % (host, port)) return false # Restart it logging.info("Restarting server %s:%d" % (host, port)) self.cfg.globalParams.WriteConfigManagerServerRestartRequest(host, port) return true
def getparams(self, policy_name, group, error_info=None): """Get the parameters for a group of scoring adjustments. Returned as a string. Result is an empty string if the group is unknown or has no data.""" error_str = None policy = None if policy_name == ScoringAdjustHandler.DEFAULT_POLICY_NAME: policy = self.cfg.getGlobalParam(C.ENT_SCORING_ADJUST) else: policies = self.cfg.getGlobalParam(C.ENT_SCORING_ADDITIONAL_POLICIES) if policies and policy_name in policies: policy = policies[policy_name] if not policy: error_str = "No policy named '%s'" % policy_name elif group not in policy: error_str = "No group '%s' for policy '%s'" % (group, policy_name) elif not policy[group]: error_str = ("Empty param list for policy '%s' group '%s'" % (policy_name, group)) else: return policy[group] logging.info("No params available: %s" % error_str) if error_info is not None: # Return error string for testing purposes. error_info.append(error_str) return ""
def kill(self, server_name): logging.info("Killing server %s" % server_name) srvrs = self.cfg.globalParams.GetServerManager().Set(server_name).Servers() for srvr in srvrs: self.cfg.globalParams.WriteConfigManagerServerKillRequest(srvr.host(), srvr.port()) return true
def start_crawlmanager_batch(self): logging.info("Sending start_batch_crawl command to urlmanager") if self.send_urlmanager_command('x start-batch'): logging.error("Error send start_batch_crawl command to urlmanager") return 0 # error return 1 # success
def DeactivateSessionManager(ver, testver): """Removes the session manager conf file from localbabysitter """ lb_util = localbabysitter_util.LocalBabysitterUtil(ver) lb_util.KillLBService('sessionmanager', SESSIONMANAGER_BIN) lb_util.ForceLocalBabysitterConfigReload() logging.info("Stopped Session Manager successfully")
def start(self): logging.info(" -- starting web service -- ") if self.local_machine_is_master: logging.info("(%s) I am the master restarting loop_AdminConsole.py" % ( self.master_machine)) self._start(op="start") return 1
def DeactivateSessionManager(ver, testver): """Removes the session manager conf file from localbabysitter """ lb_util = localbabysitter_util.LocalBabysitterUtil(ver) lb_util.KillLBService("sessionmanager", SESSIONMANAGER_BIN) lb_util.ForceLocalBabysitterConfigReload() logging.info("Stopped Session Manager successfully")
def getpagecount(self, collection, uriAt, sort, view, partial_match, flatList, logging_me=True, debugging_me=False): """Get page count""" if logging_me: logging.info("[diagnose_handler:getpagecount] Sitemap flatList = " + flatList) servers = (self.cfg.globalParams.GetServerManager().Set('urltracker_server') .Servers()) uriAt = self.SanitizeURI(uriAt) for server in servers: client = urltracker_client.URLTrackerClient(server.host(), int(server.port())) contents = client.GetPageCount(string.strip(collection), string.strip(uriAt), string.strip(sort), string.strip(view), self.GetIntValue(partial_match), self.GetIntValue(flatList)) if contents == None: continue return 'response = %s\n' % repr(contents) return 'response = []\n'
def CollectLogs(all_machines, gws_log_dir, log_collect_dir): # We only run this on oneway or master node of cluster. master = find_master.FindMaster(2100, all_machines) crt_machine = E.getCrtHostName() if len(all_machines) != 1 and (len(master) != 1 or master[0] != crt_machine): logging.info('Not a oneway or cluster master node. Return!') return lockfile = '%s/lock' % log_collect_dir # waiting up to 5 minutes for the lock. lock = E.acquire_lock(lockfile, 30, breakLockAfterGracePeriod = 0) if lock == None: logging.info('Cannot grab the lock. Return!') return try: for machine in all_machines: src_pattern = '%s/partnerlog.*' % gws_log_dir dest_dir = '%s/%s' % (log_collect_dir, machine) # If it's a oneway or master node, we make a symlink to gws_log_dir instead # of rsync to log_collect directory if machine == crt_machine: # To make it backward compatible, we need to remove old dest_dir if it's # already an existing directory from previous version because in previous # versions we created a dir and rsynced files even on the master node and # one-ways. if os.path.exists(dest_dir) and not os.path.islink(dest_dir): if not E.rm(master, '%s/*' % dest_dir) or not E.rmdir(master, dest_dir): logging.error('Directory %s exists and cannot be cleaned.', dest_dir) continue logging.info('Cleaned existing directory %s.', dest_dir) if E.ln(master, gws_log_dir, dest_dir): logging.info('Symlink %s to directory %s:%s for logs' % (dest_dir, machine, gws_log_dir)) else: logging.error('Cannot make a symlink from %s to %s' % (dest_dir, gws_log_dir)) continue # For non-master nodes on cluster, we need to rsync those files to master node logging.info('Collecting logs from %s:%s into %s' % ( machine, src_pattern, dest_dir)) # make log directories if needed liblog.MakeDir(dest_dir) # rsync all files from one remote machine in one command. rsync_cmd = 'rsync --timeout=60 --size-only -vau ' \ ' -e ssh %s:%s %s/' % (machine, src_pattern, dest_dir) # rsync the logs (status, output) = liblog.DoCommand(rsync_cmd) if status != 0: logging.error('Failed to collect logs from %s: %s' % ( machine, output)) finally: lock.close() os.unlink(lockfile)
def _StatusFileCmd(cmd, version, out=[], extra_arg='', unittestdir=None): """Perform a command on the RESET_STATE status file. On a cluster, runs lockserv <cmd> /ls/ent4-x-x/RESET_STATE. On a oneway, runs cmd on /export/hda3/4.x.x/RESET_STATE cmd should be cat, setcontents, or rm. Return: None for oneway, 0 for success, 1 for error Command output returned in out. """ if unittestdir != None or 1 == len(core_utils.GetNodes()): # unitest or Oneway if unittestdir != None: file = '/%s/%s/RESET_STATE' % (unittestdir, version) else: file = '/export/hda3/%s/RESET_STATE' % version if cmd == 'cat': status = _ExecuteCommand('cat %s' % file, out=out) elif cmd == 'setcontents': status = _ExecuteCommand('echo "%s" > %s' % (extra_arg, file)) elif cmd == 'rm': status = _ExecuteCommand('rm -f %s' % file) else: logging.error('StatusFileCmd: bad command %s' % cmd) return 1 return status lockserv_cmd_prefix = core_utils.GetLSClientCmd(version, install_utilities.is_test(version)) chubby_file = '/ls/%s/RESET_STATE' % core_utils.GetCellName(version) lockserv_cmd = '%s %s %s %s' % ( lockserv_cmd_prefix, cmd, chubby_file, extra_arg) logging.info('Reset index: executing %s' % lockserv_cmd) status = _ExecuteCommand(lockserv_cmd) return status
def SyncOpLogs(all_machines, log_dir): """ This will sync the AdminRunner.OPERATOR.* logs to all machines """ # We have to run this only on master master = find_master.FindMaster(2100, all_machines) # The name of this machine crt_machine = E.getCrtHostName() if len(master) == 1 and master[0] == crt_machine: for machine in all_machines: if machine != crt_machine: src_dir = '%s/AdminRunner.OPERATOR.*' % (log_dir) dest_dir = '%s:/%s' % (machine, log_dir) logging.info('Collecting operator logs from %s into %s' % ( src_dir, dest_dir)) rsync_cmd = 'rsync --timeout=20 --size-only -vau ' \ ' -e ssh %s %s/' % (src_dir, dest_dir) # rsync the logs lockfile = '%s/syncops_lock' % log_dir lock = E.acquire_lock(lockfile, 1, breakLockAfterGracePeriod = 0) if lock == None: logging.info('Cannot grab the lock. Return!') return try: (status, output) = liblog.DoCommand(rsync_cmd) if status != 0: logging.error('Failed to collect logs from %s: %s' % ( machine, output)) finally: lock.close() os.unlink(lockfile)
def InitDeadNodes(ver, testver, logging): """Reads local ENT_CONFIG_FILE and updates chubby with dead nodes. """ # Empty the dead node directory basecmd = GetLSClientCmd(ver, testver) dir = GetDeadNodeDir(ver) ls_cmd = '%s ls %s' % (basecmd, dir) cmd = os.popen(ls_cmd) for node in cmd.readlines(): ret = RemDeadNode(ver, testver, node.strip()) if ret: logging.warn('Cleaning up deadnodes results in code:%s for %s' % (ret, node)) ret_close = cmd.close() if ret_close: logging.info('No dead nodes found.') ret = 0 activenodes = {} allnodes = GetNodes() activenodes = GetEntConfigVar('MACHINES') deadnodes = filter(lambda x, y=activenodes: x not in y, allnodes) for node in deadnodes: logging.info('Adding dead node %s' % node) ret = AddDeadNode(ver, testver, node) if ret: break return ret
def main(argv): """ runs PeriodicScript() function in a loop""" # global MACHINES # global MAIN_GOOGLE3_DIR if len(argv) != 1: sys.exit(__doc__) try: logging.info("Running periodic_script (pid = %d)..." % os.getpid()) config = entconfig.EntConfig(argv[0]) # user config if not config.Load(): sys.exit("Cannot load the config file %s" % argv[0]) PeriodicScript(config) svs_utilities.CheckSvsAlive(["localhost"]) monitorSnmp() EnableGFS(config) EnableNamed() DnsConfig(config) admin_runner_utils.SyncOneboxLog(config) WarmIndex(config) logging.info("Finished periodic_script.") except: # collect the exception traceback so we know what went wrong (t, v, tb) = sys.exc_info() logging.error( "\nPeriodic script: Fatal Error:\n" + "=======================\n" + string.join(traceback.format_exception(t, v, tb)))
def DispatchCommand(self, command): reqtype = command.GetRequest().GetType() cmd_str = command.GetCmd() filename = command.GetRequest().GetFilename() logging.info('+++ Starting %s %s: %s' % (filename, reqtype, cmd_str)) self._start_handler(command.GetRequest()) popen3object = popen2.Popen3(cmd_str, 'true') # true: "capture stderr" popen3object.tochild.close() # close child's stdin def SetNonblocking(pipe): import fcntl fd = pipe.fileno() fl = fcntl.fcntl(fd, fcntl.F_GETFL) try: fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NDELAY) except AttributeError: fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.FNDELAY) SetNonblocking(popen3object.fromchild) SetNonblocking(popen3object.childerr) self._iopipes[popen3object.fromchild] = ("stdout", command, popen3object) self._iopipes[popen3object.childerr] = ("stderr", command, popen3object) self._running_commands[command] = popen3object
def kill_service(services, machines): """Kill all processes associated with specified services on the specified machines. E.execute() sends the commands concurrently when there is more than one node. Args: services: list of services to kill. 'adminconsole' and 'adminrunner' are currently supported. machines: list of hostnames """ # Map of services to the command that kills the service find_service_pid_cmd = { 'adminconsole': ("ps -e -o pid,args --width 100 | " "grep loop_AdminConsole.py | grep -v grep | " "awk '{print $1}' ; " "%s" % python_kill.GetServicesListeningOn(['8000'])), 'adminrunner': ("ps -e -o pid,args --width 100 | " "grep loop_AdminRunner.py | grep -v grep | " "awk '{print $1}' ; " "%s" % python_kill.GetServicesListeningOn(['2100'])), } for service in services: if service not in find_service_pid_cmd: logging.error('kill_service: Unrecognised service "%s"' % service) else: logging.info('kill_service: Killing service "%s" on %d nodes...' % (service, len(machines))) kill_cmd = ('sh -c "(kill `%s`; sleep 3; kill -9 `%s`; true)" ' '> /dev/null 2>&1' % (find_service_pid_cmd[service], find_service_pid_cmd[service])) E.execute(machines, kill_cmd, [], alarm=1, verbose=0) logging.info('kill_service: Done killing service "%s"' % service)
def SetInitState(cfg, state): """Sets system's initialization state. For oneway, it stores it in C.ENT_SYSTEM_INIT_STATE. For Clusters, it stores it in chubby file /ls/ent<version>/ENT_SYSTEM_INIT_STATE. @param cfg - of type configurator. @param state - string """ # oneway? if 1 == len(core_utils.GetNodes()): cfg.setGlobalParam(C.ENT_SYSTEM_INIT_STATE, state) return tmpfile = E.mktemp('/export/hda3/tmp') try: f = open(tmpfile, 'w') f.write(state) f.close() except IOError: logging.fatal('Cannot write to temp file %s' % tmpfile) return version = cfg.getGlobalParam('VERSION') lockserv_cmd_prefix = core_utils.GetLSClientCmd(version, is_test(version)) chubby_root_dir = '/ls/%s' % core_utils.GetCellName(version) write_cmd = '%s cp %s %s/%s' % (lockserv_cmd_prefix, tmpfile, chubby_root_dir, 'ENT_SYSTEM_INIT_STATE') logging.info('setting system init state to: %s', state) E.exe_or_fail(write_cmd) E.exe('rm -rf %s' % tmpfile)
def start_core(ver, home, nodes, ignore=0, testver=None, gfs=1): """Starts core services. Arguments: ver: '4.6.5' home: '/export/hda3/4.6.5' nodes: ['ent1', 'ent2'] ignore: 1 - ignore errors; 0 - otherwise. testver: 1 - if this is a test version; 0 - otherwise. gfs: 1 - activate gfs. 0 - otherwise. Returns: 1 - successful. 0 - otherwise. """ start = time.time() # first start chubby and chuby dns on all nodes if gfs: services = 'core services' else: services = 'all core services except GFS' logging.info('ACTIVATE: Starting %s.' % services) ret, out = core_op_out(ver, home, 'activate', nodes, ignore=ignore, testver=testver, gfs=gfs) if ret: logging.error('ACTIVATE: Cannot activate %s: %s' % (services, out)) return 0 end = time.time() diff = end - start logging.info('ACTIVATE: STAT: Start %s took %s seconds' % (services, diff)) return 1
def main(argv): if len(argv) != 1: sys.exit(__doc__) config = entconfig.EntConfig(argv[0]) if not config.Load(): sys.exit(__doc__) # Collect syslogs only if active or serve state = install_utilities.install_state(config.var('VERSION')) if not state in ['ACTIVE', 'SERVE']: sys.exit(0) # Collect syslogs only from master node. if not isMaster(config): logging.fatal('Not a oneway or cluster master node. Return!') pywrapbase.InitGoogleScript('', [ 'foo', '--gfs_aliases=%s' % config.var("GFS_ALIASES"), '--bnsresolver_use_svelte=false', '--logtostderr' ], 0) gfile.Init() first_date, last_date, printable_date, file_date = \ liblog.ParseDateRange('all',[]) apache_main_dir = liblog.get_apache_dir(config) checkpoint_dir = liblog.get_syslog_checkpoint_dir(config) liblog.MakeGoogleDir(config, checkpoint_dir) if (config.var('SYSLOG_SERVER') == None or config.var('ENT_SYSLOG_GWS_FAC') == None): logging.fatal('SYSLOG logging is disabled') lockfile = '%s/syslog_lock' % config.var('LOGDIR') lock = E.acquire_lock(lockfile, 1, breakLockAfterGracePeriod=0) if not lock: return try: logger = syslog_client.SyslogClient(config.var('SYSLOG_SERVER'), config.var('EXTERNAL_WEB_IP')) logging.info("syslog-server = %s" % config.var('SYSLOG_SERVER')) for collection in os.listdir(apache_main_dir): apache_dir = '%s/%s' % (apache_main_dir, collection) checkpoint_file = "%s/%s" % (checkpoint_dir, collection) apache_logs = liblog.FindLogFiles(apache_dir, first_date, last_date) logging.info('syslog handles collection %s' % collection) if not SyslogLogs(apache_logs, apache_dir, checkpoint_file, logger, config): sys.exit('Updating failed') logger.close() finally: lock.close() os.unlink(lockfile) sys.exit(0)