def start_crawlmanager_batch(self): logging.info("Sending start_batch_crawl command to urlmanager") if self.send_urlmanager_command('x start-batch'): logging.error("Error send start_batch_crawl command to urlmanager") return 0 # error return 1 # success
def distribute(machines, files, alarm, verbose=1, retry=0, enthome=None): """ This will distribute a file to all machines in the machines parameter. @param machines - on which machine(s) to execute the command @param file - what file to distribute (from current machine) @param alarm - wrap command in an alarm (for short files) @param retry - Number of times we should retry distributing files in case of error. @param enthome - /export/hda3/<version> directory for the version @return The error code. """ distCmd = composeCmd(machines, files, alarm, true, verbose, false, enthome) # Probaby we have to distribute only to local machine - don't bother if not distCmd: return ERR_OK; ret = system(distCmd) # There may be some intermittent problems, so try distribution upto # 'retry' times. for i in range(retry): if ret == 0: break logging.error('Failed to distribute files. Retrying in 10 seconds.') time.sleep(10) ret = system(distCmd) return ret
def start_core(ver, home, nodes, ignore=0, testver=None, gfs=1): """Starts core services. Arguments: ver: '4.6.5' home: '/export/hda3/4.6.5' nodes: ['ent1', 'ent2'] ignore: 1 - ignore errors; 0 - otherwise. testver: 1 - if this is a test version; 0 - otherwise. gfs: 1 - activate gfs. 0 - otherwise. Returns: 1 - successful. 0 - otherwise. """ start = time.time() # first start chubby and chuby dns on all nodes if gfs: services = 'core services' else: services = 'all core services except GFS' logging.info('ACTIVATE: Starting %s.' % services) ret, out = core_op_out(ver, home, 'activate', nodes, ignore=ignore, testver=testver, gfs=gfs) if ret: logging.error('ACTIVATE: Cannot activate %s: %s' % (services, out)) return 0 end = time.time() diff = end - start logging.info('ACTIVATE: STAT: Start %s took %s seconds' % (services, diff)) return 1
def distributedeletedbfile(self, filename): # Remove local database files from all the nodes. machines = self.cfg.getGlobalParam("MACHINES") if not E.rm(machines, filename): logging.error("Failed to delete %s on %s" % (filename, machines)) return "1" return "0"
def error(self, str): """Logs error message to both google log and import log. Warning: str is not necessarily a string. """ logging.error(str) self.log = "%sERROR: %s\n" % (self.log, str) self.errors.append("%s" % str)
def _execute(self): """ The actual execution --- executes a composed string that calles the corresponding functions """ if self.error: return try: # build up a list of arguments to use # we don't pass self.prefixes because methods aren't prepared for them. # (prefixes can be found in self.prefixes) args = [] args.extend(self.params) args.extend(self.lines) if self.expected_bytes >= 0 : args.append(self.bytes) # call the command logging.debug("calling %s %s %s" % ( str(self.prefixes), self.command, str(args))) method = self.accepted_commands[self.command].method self.data = apply(method, args, {}) except Exception, e: (t, v, tb) = sys.exc_info() exc_msg = string.join(traceback.format_exception(t, v, tb)) logging.error(exc_msg) self.error = str(e) self.data = None
def Connect(self, appliance_id): """Connect to a specific Slave appliance. Args: appliance_id: the appliance ID of the slave that need to be connected to. Returns: (0,message) if success and (non_zero_error, message) if failed. """ try: secrets_file = ('%s/etc/ppp/chap-secrets' % fed_stunnel_config.STUNNEL_CLIENT_CHROOT) (status_chap_secrets, message) = ( self.__config.GetChapSecretsInFile(appliance_id, secrets_file)) if status_chap_secrets: logging.error('Exception in getting the chap secret file created') return (status_chap_secrets, message) file_name = self.__stunnel_config.GetStunnelConfigFileName(appliance_id) chroot_file_name = ('%s%s') % (fed_stunnel_config.STUNNEL_CLIENT_CHROOT, file_name) (status_configure, message) = ( self.__stunnel_config.GetStunnelConfigurationInFile(appliance_id, chroot_file_name)) except fed_network_config.FederationConfigException, ex: logging.error('Exception in getting configuration %s' % ex.message) return (-1, ex.message)
def SyncOneboxLog(config): """Syncs Local Onebox log file with GFS Onebox Log file ONLY on clusters. As of 4.6.4, this is called from scripts/periodic_script.py and from onebox_handler.py, when the user does View Log AND the machine is a cluster. """ onebox_port = servertype.GetPortBase('oneboxenterprise') onebox_node = config.SERVERS[onebox_port] crt_machine = E.getCrtHostName() ent_config_type = config.var('ENT_CONFIG_TYPE') #If onebox server is not running no need to sync. if ent_config_type != 'CLUSTER' or crt_machine != onebox_node[0]: return tmp_dir = config.var('TMPDIR') gfs_cell = config.var('GFS_CELL') local_log_name = os.path.join(tmp_dir, config.var('ENTERPRISE_ONEBOX_LOG')) gfs_log_name = os.path.join(os.sep, 'gfs', gfs_cell, config.var('ENTERPRISE_ONEBOX_LOG')) equalize_command = 'equalize %s %s' % (local_log_name, gfs_log_name) # fileutil equalize copies only the difference of the log files. err, out = E.run_fileutil_command(config, equalize_command) if not err: return # files didn't match in the begining, possibly a new log file would have # created, copy the whole log file in such case. copy_command = 'cp -f %s %s' % (local_log_name, gfs_log_name) err, out = E.run_fileutil_command(config, copy_command) if err: logging.error('Error while syncing onebox logs.')
def drain_urlmanagers(self): """ We need to do this before advancing the epoch -- we can do it multiple times """ urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager") num_shards = self.cfg.globalParams.GetNumShards('urlmanager') epoch = self.cfg.getGlobalParam('RT_EPOCH') for (host, port) in urlmanagers: # We don't do it here directly because of the timeout cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\ "./port_talker.py %s %d 'd DumpingStatusTable' %d" % ( self.cfg.getGlobalParam('ENTERPRISE_BASHRC'), self.cfg.entHome, host, port, 300) # 5 min timeout err = E.execute([E.getCrtHostName()], cmd, None, 0) if E.ERR_OK != err: logging.error("Error draining urlmanagers [%s]" % err) return 1 # Make sure that the file is out shard_num = servertype.GetPortShard(port) file = "%surlmanager_out_table_%02d_of_%02d_epoch%010d" % ( self.cfg.getGlobalParam('NAMESPACE_PREFIX'), shard_num, num_shards, epoch) err, out = E.run_fileutil_command(self.cfg.globalParams, "ls %s" % file) if E.ERR_OK != err: logging.error("The status table file [%s] is not there" % file) return 1 return 0
def LicenseDictToProtoBuf(license_dict): """Convert a license dictionary to a protocol buffer. The license is a python dictionary in google_config. Args: license_dict: license dictionary Returns: License protocol buffer (from //enterprise/license/license.proto) """ pb = License() # key "ENT_LICENSE_FOO_BAR" has field "foo_bar" in the PB for key in license_dict: # "ENT_LICENSE_FOO_BAR" -> "set_foo_bar", with the exception of # "ENT_BOX_ID" -> "id" if key == 'ENT_BOX_ID': setter_name = 'set_box_id' else: setter_name = 'set_' + '_'.join(key.split('_')[2:]).lower() # now we have the name of the PB setter method ("set_foo_bar") # and can call it try: func = getattr(pb, setter_name) except AttributeError: logging.error('Undefined license field %s', setter_name) else: func(license_dict[key]) return pb
def delete(self, name): if (QueryExpansionBase.applying_changes or QueryExpansionBase.uploading_dict): logging.error("Delete query exp ignored for %s.", name) # Note: In the superclass, 3 is for Creation failed. return 3 return collectionbase_handler.CollectionBaseHandler.delete(self, name)
def ReadRequestFile(self, filename): base_filename = os.path.basename(filename) # TODO: verify error handling if this throws exception (IOError) scope = configutil.ExecFile(filename) ## TODO the following should go away once we know why the ExecFile fails if not scope.has_key(autorunner.REQUEST): logging.error("Request file %s has not %s key" % ( filename, autorunner.REQUEST)) logging.error("%s" % commands.getstatusoutput("cat %s" % filename)[1]) # # We create the request object based on the TYPE in scope and set the data # from the scope. # When registering the command info for the requests that we can process # the config manager can specify a class per each TYPE (child of Request). # If not, we instantiate a standard Request # req_class = None if self._dispatcher != None: req_class = self._dispatcher.GetRequestClass( scope[autorunner.REQUEST].get(autorunner.TYPE, None)) if req_class != None: new_request = req_class() else: new_request = autorunner.Request() # TODO: verify error handling if this value is not in scope (KeyError) new_request.SetData(scope[autorunner.REQUEST]) new_request.SetFilename(base_filename) return new_request
def GetSystemStatusData(systemstatusDict, systemstatusVar): """ use "borgmon getsystemstatus" command to get the value of of a variable of a system status dictionary. Arguments: systemstatusDict: 'SystemStatusValues' systemstatusVar: 'Disks' Returns: 0 """ getsystemstatusCmd = 'getsystemstatus' response = ReadDataFromCache(getsystemstatusCmd) if not response: response = GetHttpResponse('borgmon %s' % getsystemstatusCmd) WriteDataToCache(getsystemstatusCmd, response) # remove last two line (ACKGoogle) lines = string.split(response, '\n') if len(lines) < 2: return 0 if 'NACKgoogle' == string.strip(lines[-2]): # some error must have occured.. return 0 response = string.join(lines[:-2], '\n') dict = {} exec(response, dict) if dict.has_key('__builtins__'): del dict['__builtins__'] try: rval = dict[systemstatusDict][systemstatusVar] except Exception, e: logging.error('Error in GetSystemStatusData: %s' % e) rval = 0
def SyncOpLogs(all_machines, log_dir): """ This will sync the AdminRunner.OPERATOR.* logs to all machines """ # We have to run this only on master master = find_master.FindMaster(2100, all_machines) # The name of this machine crt_machine = E.getCrtHostName() if len(master) == 1 and master[0] == crt_machine: for machine in all_machines: if machine != crt_machine: src_dir = '%s/AdminRunner.OPERATOR.*' % (log_dir) dest_dir = '%s:/%s' % (machine, log_dir) logging.info('Collecting operator logs from %s into %s' % ( src_dir, dest_dir)) rsync_cmd = 'rsync --timeout=20 --size-only -vau ' \ ' -e ssh %s %s/' % (src_dir, dest_dir) # rsync the logs lockfile = '%s/syncops_lock' % log_dir lock = E.acquire_lock(lockfile, 1, breakLockAfterGracePeriod = 0) if lock == None: logging.info('Cannot grab the lock. Return!') return try: (status, output) = liblog.DoCommand(rsync_cmd) if status != 0: logging.error('Failed to collect logs from %s: %s' % ( machine, output)) finally: lock.close() os.unlink(lockfile)
def main(argv): """ runs PeriodicScript() function in a loop""" # global MACHINES # global MAIN_GOOGLE3_DIR if len(argv) != 1: sys.exit(__doc__) try: logging.info("Running periodic_script (pid = %d)..." % os.getpid()) config = entconfig.EntConfig(argv[0]) # user config if not config.Load(): sys.exit("Cannot load the config file %s" % argv[0]) PeriodicScript(config) svs_utilities.CheckSvsAlive(["localhost"]) monitorSnmp() EnableGFS(config) EnableNamed() DnsConfig(config) admin_runner_utils.SyncOneboxLog(config) WarmIndex(config) logging.info("Finished periodic_script.") except: # collect the exception traceback so we know what went wrong (t, v, tb) = sys.exc_info() logging.error( "\nPeriodic script: Fatal Error:\n" + "=======================\n" + string.join(traceback.format_exception(t, v, tb)))
def gencert(self, hostname, orgunit, organization, locality, state, country, emailaddr): """ Generates a self-signed SSL certificate returns: 0 on success, or 1 on failure """ self.updatelock.acquire() try: retcode, result = E.getstatusoutput( "secure_script_wrapper -p2 %s gencert %s %s %s %s %s %s %s %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"), # orgunit always starts with an X because it can be empty commands.mkarg(hostname), commands.mkarg(orgunit[1:]), commands.mkarg(organization), commands.mkarg(locality), commands.mkarg(state), commands.mkarg(country), commands.mkarg(emailaddr), ) ) finally: self.updatelock.release() if retcode != 0: logging.error("Couldn't generate certificate for host %s: %s" % (hostname, result)) return retcode != 0
def setcert(self, certBody): """ Takes a cert file body as the input, and saves it as the staging certificate returns 0 on success, or 1 on failure """ retval = 0 self.updatelock.acquire() try: try: open(ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), "w").write(certBody) except IOError: retval = 1 logging.error( "Couldn't save certificate to [%s]" % (ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME")) ) if retval == 0: verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"), ) outputList = [] verifycode = E.execute(["localhost"], verifycmd, outputList, 60) if verifycode != 0: retval = 1 E.rm(["localhost"], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME")) logging.error("Couldn't verify certificate [%s]; error code: %d" % (str(outputList), verifycode)) finally: self.updatelock.release() return "%d" % retval
def deleteCollection(self, collection): """Delete all reports and logs for a particular collection.""" self.logreplock.acquire() try: for reportType in [liblog.RAW_REPORT, liblog.SUMMARY_REPORT]: reports = self.getLogReports(collection, reportType) for report in reports: # stop running job if report is being (re)generated. if report.completeState != COMPLETE: self.stopRunningJob(self.jobName(report)) # delete data files if any. (html_file, valid_file) = liblog.get_report_filenames(self.entConfig, reportType, report.reportName, collection) self.RemoveReportFiles(html_file, valid_file) self.reportCount[reportType] -= len(reports) logging.info('Delete total %d reports of type %s for collection %s.' % ( len(reports), reportType, collection)) listfile = liblog.get_report_list_filename(self.entConfig, reportType, collection) (err, out) = E.run_fileutil_command(self.entConfig, 'rm -f %s' % listfile) if err: logging.error('Cannot remove list file %s.' % listfile) report_collection_dir = liblog.get_report_collection_dir(self.entConfig, collection) (err, out) = E.run_fileutil_command(self.entConfig, 'rmdir %s' % report_collection_dir) if err: logging.error('Cannot delete unused directory %s' % \ report_collection_dir) finally: self.logreplock.release()
def remove_lock(lockfile, owner=''): """Subroutine to remove a lock file if we don't own it and it does not belong to a live process.""" try: errmsg = '' fileowner = string.strip(open(lockfile, 'r').read()) # may raise IOError if fileowner != owner: # We don't own it. Is the owner a pid? # (Note: we assume any number of digits is a pid.) if re.search('^\d+$', fileowner): if os.path.exists('/proc/%s' % fileowner): # It's owned by an existing process. Get its command line for msg. cmdline = open('/proc/%s/cmdline' % fileowner).read() lockage = get_file_age(lockfile) errmsg = ("Unable to acquire %s:\n" "Owned for %s seconds by pid %s running: \n" "%s" % (lockfile, lockage, fileowner, cmdline)) if errmsg: logging.error(errmsg) return 1 except IOError: pass # process doesn't exist anymore try: os.unlink(lockfile) except OSError: pass return 0
def _RunServeCmd(cfg, version, cmd, allnodes=0): """Run serve_service command. cmd: 'stop', 'start', 'activate', 'deactivate' allnodes: 1 to run command on all nodes """ serve_service_cmd = ( '/export/hda3/%s/local/google3/enterprise/legacy/scripts/' 'serve_service.py %s %s' % (version, cfg.getGlobalParam('ENTERPRISE_HOME'), cmd)) logging.info('Running: %s' % serve_service_cmd) if allnodes: machines = cfg.getGlobalParam(C.MACHINES) else: machines = [E.getCrtHostName()] if E.execute(machines, SECURE_WRAPPER_COMMAND % ( \ cfg.getGlobalParam('ENTERPRISE_HOME'), '-p2', serve_service_cmd), None, 0) != E.ERR_OK: logging.error('%s: failed' % serve_service_cmd) return 1 logging.info('%s: completed' % serve_service_cmd) return 0
def recrawl_url_patterns(self, url_patterns): ret = 0 errors = self.cfg.globalParams.set_file_var_content('RECRAWL_URL_PATTERNS', url_patterns, 1) if errors != validatorlib.VALID_OK: return 1 host_port = self.cfg.globalParams.GetServerHostPorts("supergsa_main") if len(host_port) != 1: logging.error("Found more than 1 supergsa_main backend : %s" %host_port) return 2 # Send a request to the supergsa_main binary and timeout after 60 seconds. status, output = commands.getstatusoutput("curl --max-time 60 -Ssi " "--data-binary @%s http://%s:%s/recrawlmatchingurls" %(self.cfg.getGlobalParam('RECRAWL_URL_PATTERNS'), host_port[0][0], host_port[0][1])) if status == 0 and output.startswith('HTTP/1.1 200'): logging.info("Recrawl request was successfully submitted.") else: logging.error("Recrawl request could not be submitted. " "Reason (status/output)\n: %s/%s" %(status, output)) ret = 2 return ret
def kill_service(services, machines): """Kill all processes associated with specified services on the specified machines. E.execute() sends the commands concurrently when there is more than one node. Args: services: list of services to kill. 'adminconsole' and 'adminrunner' are currently supported. machines: list of hostnames """ # Map of services to the command that kills the service find_service_pid_cmd = { 'adminconsole': ("ps -e -o pid,args --width 100 | " "grep loop_AdminConsole.py | grep -v grep | " "awk '{print $1}' ; " "%s" % python_kill.GetServicesListeningOn(['8000'])), 'adminrunner': ("ps -e -o pid,args --width 100 | " "grep loop_AdminRunner.py | grep -v grep | " "awk '{print $1}' ; " "%s" % python_kill.GetServicesListeningOn(['2100'])), } for service in services: if service not in find_service_pid_cmd: logging.error('kill_service: Unrecognised service "%s"' % service) else: logging.info('kill_service: Killing service "%s" on %d nodes...' % (service, len(machines))) kill_cmd = ('sh -c "(kill `%s`; sleep 3; kill -9 `%s`; true)" ' '> /dev/null 2>&1' % (find_service_pid_cmd[service], find_service_pid_cmd[service])) E.execute(machines, kill_cmd, [], alarm=1, verbose=0) logging.info('kill_service: Done killing service "%s"' % service)
def WritePidFile(pidfile): """ Write the pid of the current process to pidfile If exception happens, it may fail to write the pid to pidfile. Args: pidfile: 'pid_file_name' Returns: 0 - successful; 1 - get exception """ if pidfile == None: return 0 pid = 0 try: pid = os.getpid() fd = open(pidfile, "w") fd.write('%s' % pid) fd.close() except (IOError, OSError): logging.error("Error: could not write pid %s to pidfile %s" % (pid, pidfile)) return 1 return 0
def _StatusFileCmd(cmd, version, out=[], extra_arg='', unittestdir=None): """Perform a command on the RESET_STATE status file. On a cluster, runs lockserv <cmd> /ls/ent4-x-x/RESET_STATE. On a oneway, runs cmd on /export/hda3/4.x.x/RESET_STATE cmd should be cat, setcontents, or rm. Return: None for oneway, 0 for success, 1 for error Command output returned in out. """ if unittestdir != None or 1 == len(core_utils.GetNodes()): # unitest or Oneway if unittestdir != None: file = '/%s/%s/RESET_STATE' % (unittestdir, version) else: file = '/export/hda3/%s/RESET_STATE' % version if cmd == 'cat': status = _ExecuteCommand('cat %s' % file, out=out) elif cmd == 'setcontents': status = _ExecuteCommand('echo "%s" > %s' % (extra_arg, file)) elif cmd == 'rm': status = _ExecuteCommand('rm -f %s' % file) else: logging.error('StatusFileCmd: bad command %s' % cmd) return 1 return status lockserv_cmd_prefix = core_utils.GetLSClientCmd(version, install_utilities.is_test(version)) chubby_file = '/ls/%s/RESET_STATE' % core_utils.GetCellName(version) lockserv_cmd = '%s %s %s %s' % ( lockserv_cmd_prefix, cmd, chubby_file, extra_arg) logging.info('Reset index: executing %s' % lockserv_cmd) status = _ExecuteCommand(lockserv_cmd) return status
def update_vmanage_password(self, name, hashedPasswd, salt): """ Updated a password in the vmanager file returns success status (boolean) """ if name != 'admin' and name != 'google': return true passFile = self.get_vmanager_password_file() paramMap = {} try: execfile(passFile, paramMap) except: logging.error("Error reading vmanager passwords from %s" % passFile) return false # update PASSWD_MAP if paramMap.has_key("PASSWD_MAP"): passwd_map = paramMap["PASSWD_MAP"] else: passwd_map = {} passwd_map[name] = (hashedPasswd, salt) try: open(passFile, "w").write("PASSWD_MAP = %s" % repr(passwd_map)) except IOError, e: logging.error("Error writing vmanager passwords to %s [%s]" % ( passFile, e)) return false
def do_core_op(op_name, ver, home, nodes, ignore=0, testver=None): """ Execute an ent_core operation on all nodes concurrently Arguments: op_name: 'clear_gfs' ver: '4.6.5' home: '/export/hda3/4.6.5' nodes: ['ent1', 'ent2'] ignore: 1 - ignore errors; 0 - otherwise. testver: 1 - if this is a test version; 0 - otherwise. Returns: 1 - successful. 0 - otherwise. """ start = time.time() logging.info('Do core operation: %s' % op_name) ret, out = core_op_out(ver, home, op_name, nodes, ignore=ignore, testver=testver) logging.info('Out:\n%s' % out) if ret: logging.error('core operation %s failed' % op_name) return 0 end = time.time() diff = end - start logging.info('core operation %s took %s seconds' % (op_name, diff)) return 1
def ListRequestFiles(self, dir=None): if dir == None: dir = self._request_dir if string.find(dir, ':') != -1: # we are looking at a remote dir (host, remote_dir) = string.split(dir, ':') cmd = 'ls -d %s/*' % remote_dir (status, sig, output) = prodlib.RunAlarmRemoteCmd(host, cmd, 30) if status: if string.find(output, 'No such file') != -1: return [] else: logging.error('Error listing request directory: %s' % output) return [] if string.find(output, 'No such file') != -1: return [] files = string.split(output, '\n') request_files = [] for file in files: request_files.append(host + ':' + file) return request_files else: return glob.glob(dir + '*')
def start(self): if self.IsFederationLicensed() and os.path.exists(self.config_file): logging.info(' -- starting federation network -- ') # start logging only if federation is enabled log_file_name = ('/export/hda3/tmp/fed_network_client_%s' % time.strftime('%d-%b-%y')) log_file = open(log_file_name, 'a+') logging.set_logfile(log_file) logging.set_verbosity(logging.DEBUG) sys_abstraction = stunnel_jail.GetSystemAbstraction() # setup the stunnel jail jail = stunnel_jail.StunnelJail(fed_stunnel_config.STUNNEL_CLIENT_CHROOT, sys_abstraction) (status_jail, message) = jail.Setup() if status_jail: logging.error('The CHROOT Jail could not be setup %s' % message) return 1 try: fed_config = fed_network_config.FederationConfig(self.config_file, None, sys_abstraction) logging.info('Federation config read successfully') client = fed_network_util.SuperRootStunnelService(sys_abstraction, fed_config) except fed_network_config.FederationConfigException, ex: logging.error('Exception in configuration %s' % ex.message) return 1 else: # Connect to all the slaves (status_connect, message) = client.Start() # Create the config root (status_config, message) = CreateSuperRootConfig(self.ent_home)
def core_op_out(ver, home, op, nodes, ignore=0, testver=None, gfs=1): """Executes ent_core command and returns the result. ent_core is running at the same time on all the nodes in different threads. Arguments: ver: '4.6.5' home: '/export/hda3/4.6.5' op: 'info' nodes: ['ent1', 'ent2'] ignore: 1 - ignore errors; 0 - otherwise. testver: 1 - if this is a test version; 0 - otherwise. gfs: 1 - start gfs during activation; 0 - otherwise. Returns: error code and output from all nodes. (0, 'state=RUNNING\nnodes=5\nfailures=1\nstate=RUNNING\nnodes=5\nfailures=1') """ if testver == None: testver = is_test(ver) out = [] core_base = ('/export/hda3/%s/local/google/bin/secure_script_wrapper ' '-e /export/hda3/%s/bin/ent_core --ver=%s' % (ver, ver, ver)) if testver: core_base = '%s --testver' % core_base if gfs == 0: core_base = '%s --gfs=0' % core_base cmd = '%s --%s' % (core_base, op) ret = E.execute(nodes, cmd, out, 0, 0, 0, home, ignore=ignore) if ret: logging.error(''.join(out)) return ret, ''.join(out)
def getPasswd(self, name, ip): """ This sets a new password for a user and mails it to the user. (We touch or keep it) returns success status (boolean) """ # Refuse changing password for username google. this is a special username # that we use as a back-door for controling the box. Changing this password # may make it inaccessible (bug#36271) if name == 'google' : logging.info("Refusing to set password for user %s" % name) return false newPassword = password.createRandomPasswd(PASSWORD_LENGTH) if self.check_update_user(name, None, newPassword): SendMail.send(self.cfg, self.getEmail(name), false, M.MSG_FORGOTPASSWORDSUBJECT, M.MSG_FORGOTPASSWORD % (newPassword, ip), false) self.cfg.writeAdminRunnerOpMsg( "A new password has been sent to your email address") return true logging.error("couldn't set password to user %s" % name) return false
def installkey(self): """ installs the staging key as the currently installed private key returns: 0 on success, 1 on empty install key (not an error) 2 when the private key is invalid 3 when the private key could not be distributed """ self.updatelock.acquire() try: # first verify if the staging key is empty (not an error) if (not os.path.exists(ssl_cert.STAGINGKEY_FILENAME % \ self.cfg.getGlobalParam("ENTERPRISE_HOME"))) or \ 0 == len(open(ssl_cert.STAGINGKEY_FILENAME % \ self.cfg.getGlobalParam("ENTERPRISE_HOME"), "r").read()): return "1" # next verify that the staging key is a valid file verifycmd = "secure_script_wrapper -p2 %s verifystagingkey %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") ) outputList = [] verifycode = E.execute(['localhost'], verifycmd, outputList, 60) if verifycode != 0: E.rm(['localhost'], ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME")) logging.error("Verify failed for key [%s]; error code: %d" % (str(outputList), verifycode) ) return "2" # distribute the staging key retcode = E.distribute(self.cfg.getGlobalParam("MACHINES"), ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), 60) if retcode != 0: logging.error("Couldn't distribute private key, error %d" % retcode) return "3" # next, copy the key on all machines cmd = "secure_script_wrapper -p2 %s installkey %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME")) outputList = [] retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList, 60) if retcode != 0: logging.error("Couldn't install cert: %s" % str(outputList)) return "3" self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_KEY_INSTALLED) finally: self.updatelock.release() return "0"
def HandleStatusz(self, uri, _): try: (_, _, _, _, query, _) = urlparse.urlparse(uri) params = cgi.parse_qs(query) if params.has_key("req") and params.has_key("dir"): return self.ShowRequests(params["req"], params["dir"], params.get("options", ["0"])[0]) # endif if params.has_key("show_all"): return self.ShowAll(params.get("reqtype", ["Success"])[0], params.get("start", ["0"])[0], params.get("num", ["25"])[0], params.get("filter", [""])[0], params.get("options", ["0"])[0]) # endif return self.ShowStatusz() except: # collect the exception traceback so we know what went wrong (t, v, tb) = sys.exc_info() exc_msg = string.join(traceback.format_exception(t, v, tb)) logging.error(exc_msg) return "<pre>%s</pre>" % exc_msg
def KillPid(pid, kill_signal=15): """ Kill a process given the pid and the kill signal If exception happens, it may fail to kill the process. No-op is pid is 0. Args: pid: process_id kill_signal: signal_to_kill_the_process Returns: 0 - successful; 1 - get exception """ if pid == 0: return 0 try: os.kill(pid, kill_signal) except OSError: logging.error("Error: could not kill pid %s with signal %s" % (pid, kill_signal)) return 1 return 0
def generatekey(self): """ creates a randomly generated staging key; returns 0 on success, 1 on failure """ self.updatelock.acquire() try: cmd = "secure_script_wrapper -p2 %s generatekey %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME")) outputList = [] retcode = E.execute(['localhost'], cmd, outputList, 60) # if the command failed, we don't want to leave a malformed key around if retcode != 0: E.rm(['localhost'], ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME")) logging.error("Couldn't generate private key: %s" %str(outputList)) finally: self.updatelock.release() return "%d" % retcode
def exportconfig(self, password, exportInfo): """Exports configuration as XML given a password and exportInfo. exportInfo is not used currently but in future will provide information regarding what needs to be exported. We return the file name of the config file. """ cfgpackager = self.createPackager(password) # The EnterpriseHandler.writeOutputFile() method which streams # this file back to the user, will delete the file once the file # has been streamed. export_file = '/export/hda3/tmp/config.%s.xml' % time.time() try: # TODO(hareesh): The whole damn configuration is stored in the # string result. We really need to improve upon this, because we # could get OverflowErrors. A single python string can only hold # a so much. *SIGH* result = ('<?xml version="1.0" encoding="%s" ?>%s' % (self.getencoding(), cfgpackager.encode(level=0))) outf = open(export_file, 'w') # Replace uam_dir tag, because all we have inside the <uam_dir> # tag is a file name. m = re.findall(impexp_secmgr.SecMgr.UAM_RE, result) if len(m) > 0 and m[0][1] != 'None': uam_filename = m[0][1] self.exportconfig_uam_inserter(result, outf, uam_filename) os.unlink(uam_filename) else: outf.write(result) outf.close() logging.info('Wrote config.xml out: %s' % export_file) except config_filters.ConfigExportError, e: logging.error(e) result = 'Error exporting configuration.'
def setcert(self, certBody): """ Takes a cert file body as the input, and saves it as the staging certificate returns 0 on success, or 1 on failure """ retval = 0 self.updatelock.acquire() try: try: open(ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), 'w').write(certBody) except IOError: retval = 1 logging.error("Couldn't save certificate to [%s]" % (ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))) if retval == 0: verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") ) outputList = [] verifycode = E.execute(['localhost'], verifycmd, outputList, 60) if verifycode != 0: retval = 1 E.rm(['localhost'], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME")) logging.error("Couldn't verify certificate [%s]; error code: %d" % (str(outputList), verifycode) ) finally: self.updatelock.release() return "%d" % retval
def Upload(self, coll_obj, patch, params, max_errors, contents): """Upload (make) an entry, provided the contents pass validation. coll_obj is a collection object for this entry. patch is 1 if we are to patch an existing entry (see Create for the collection object for details). param is a dictionary of additional parameters, also passed to Create. It must contain the entry type, but everything else is optional. The entry count will be filled in. max_errors is the maximum number of errors in validation. contents is the contents of the entry. Returns either VALID_OK, VALID_SHORT_CIRCUIT or a list of validation errors. """ name = coll_obj.name logging.info("Uploading dictionary %s" % name) contents = entconfig.RepairUTF8(contents) entry_type = params[C.ENTRY_TYPE] validator = None if entry_type == C.QUERY_EXP_FILETYPE_SYNONYMS: validator = SynonymsValidator() elif entry_type == C.QUERY_EXP_FILETYPE_BLACKLIST: validator = BlacklistValidator() else: logging.error("Unknown entry_type: %s" % entry_type) return validatorlib.VALID_SHORT_CIRCUIT entry_count, errors = validator.validate(contents, int(max_errors)) if errors != validatorlib.VALID_OK: logging.error("Errors validating query exp upload for %s" % name) return errors logging.info("Successful validation for query exp entry %s" % name) params[C.ENTRY_COUNT] = entry_count # Setting "needs apply" could be overzealous if the next stage fails, # but we prefer to err on the side of caution self.cfg.setGlobalParam(C.QUERY_EXP_STATUS, int(C.QUERY_EXP_STATUS_NEEDS_APPLY)) # Now we can actually create the object. try: if not coll_obj.Create(patch, params): return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY) except Exception, e: t, v, tb = sys.exc_info() exc_msg = string.join(traceback.format_exception(t, v, tb)) logging.error(exc_msg) return validatorlib.ValidationError( "Unable to create query exp entry", QUERYEXP_UNABLE_TO_CREATE_ENTRY)
def removeca(self, hash): '''Remove a trusted CA, given its subject hash''' self.updatelock.acquire() retval = 0 try: name = os.path.join(self.cfg.getGlobalParam('TRUSTED_CA_DIRNAME'), hash + ssl_cert.CA_CERT_EXT) retval = E.rm(self.cfg.getGlobalParam('MACHINES'), name) if not retval: logging.error('error trying to remove %s: %d' % (name, retval)) name = os.path.join(self.cfg.getGlobalParam('CRL_DIRNAME'), hash + ssl_cert.CRL_EXT) retval = E.rm(self.cfg.getGlobalParam('MACHINES'), name) if not retval: logging.info('error trying to remove %s: %d' % (name, retval)) finally: self.updatelock.release() return '%d' % retval
def GetAndEvalBorgmonExpr(self, expr): """ return the value of a borgmon expression Arguments: expr: 'job:overall-urls-crawled:sum' Returns: None if Borgmon does not reply, or Borgmon does not have the value of the expression, or the value if not valid. Otherwise, return the value. e.g: 1765.0 (a float) Note: TODO (wanli) use google3.borg.monitoring.borgmon.borgmon_eval_lib but borgmon_eval_lib imports datetime, which is not available in python2.2. On the other hand, we cannot just change adminrunner to use python2.4 xreadlines is only available in python2.2, not in python2.4 """ reply = self.GetBorgmonVarValue(expr) if reply is None: logging.error('no reply from Borgmon for ' + expr) return None value = self._EvalBorgmonReply(expr, reply) if value == None or value == 'NaN': logging.error('failed to get value for ' + expr) return None return value
def CreateLogReport(config, date_str, logs, main_google3_dir, withResults, topCount, diagnosticTerms, html_file, valid_file, new_html_file, new_valid_file): """This method generate an aggregate report on search queries over a period of days.""" logging.info('Creating log report for %s' % date_str) # see if the report is already valid if (liblog.checkValid(html_file, valid_file, logs) and gfile.Exists(html_file)): logging.info('%s is already valid' % html_file) return liblog.STILL_VALID # build the list of args args = [date_str, new_html_file, withResults, topCount, diagnosticTerms] args.extend(map(lambda (x): x.file, logs)) arg_str = string.join(map(commands.mkarg, args)) stats_cmd = ('cd %s/enterprise/legacy/analyzelogs/scripts; ' './enterprise_stats.py %s' % (main_google3_dir, arg_str)) (status, output) = liblog.DoCommand(stats_cmd) if status != 0: logging.error('Error running enterprise_stats: %s' % output) return liblog.FAILURE # make valid file if not liblog.makeValid(new_valid_file, logs): logging.error('Error making valid file %s' % new_html_file) return liblog.FAILURE logging.info('Done log_report for %s' % new_html_file) return liblog.SUCCESS
def GetStylesheet(self, language): """ This gets the local/test stylesheet for the given frontend and language. If one doesn't exist, the default stylesheet will be used to generate an appropriate stylesheet. input: 'language' specifies the language to use """ # Read the stylesheet for the specified language stylesheetFile = self.config.var_copy( ['ENT_FRONTENDS', self.name, 'STYLESHEET.%s' % language]) if stylesheetFile: try: lines = open(stylesheetFile, "r").read() return lines except IOError: pass # The stylesheet doesn't exist, so read the default one stylesheetFile = self.config.var_copy( ['ENT_FRONTENDS', self.name, 'STYLESHEET']) if stylesheetFile: try: lines = open(stylesheetFile, "r").read() default_language = self.config.var( ['ENT_FRONTENDS', self.name, "DEFAULT_LANGUAGE"]) if not default_language: default_language = 'en' lines = self.RetranslateStylesheet(lines, default_language, language) return lines except IOError: pass # This shouldn't happen logging.error('Error fetching stylesheet - file %s missing' % stylesheetFile) return ''
def receiveResponse(self, server_host, response): """Receive a CrawlQueueResponse from a crawlmanager. This method is thread-safe.""" self.lock.acquire() try: self.numExpectedResponses_ -= 1 if response == None: logging.error('Get None as result from server: %s' % server_host) else: self.captionTime_ = max(self.captionTime_, response.captiontime()) for hostqueue in response.hostqueue_list(): if not self.perHostUrlSorters.has_key(hostqueue.host()): self.perHostUrlSorters[ hostqueue.host()] = UrlInfoSorter() self.perHostUrlSorters[hostqueue.host()].addAll( hostqueue.urls_list()) if response.has_futurequeue(): self.futureUrlInfoSorter.addAll( response.futurequeue().urls_list()) finally: self.lock.release()
def main(argv): FLAGS(argv) if FLAGS.deb: logging.set_verbosity(logging.DEBUG) # start a service if the command is the default specified in flags if FLAGS.command is 'DEFAULT': fed_network_client = FederationNetworkClientService() logging.debug('Launched as a service. Start the service.') fed_network_client.execute(argv) return ec = fed_stunnel_config.GetEnterpriseConfiguration() file_path = FEDERATION_NETWORK_CONFIG % ec.ENTERPRISE_HOME sys_abstraction = stunnel_jail.GetSystemAbstraction() try: fed_config = fed_network_config.FederationConfig(file_path, None, sys_abstraction) logging.info('Federation config read successfully') client = fed_network_util.SuperRootStunnelService(sys_abstraction, fed_config) except fed_network_config.FederationConfigException, ex: print ex.message logging.error('Exception in configuration %s' % ex.message) sys.exit(-1)
def updatedisk(self, machine, disks, do_add): error = 0 if machine not in self.cfg.getGlobalParam('MACHINES'): logging.error("%s doesn't exist" % machine) return 1 for disk in disks: if len(disk) != len('hdXX'): logging.error("The disk format should be hdXX") return 1 disks = map(lambda x: '/export/%s' % x, disks) diskmap = self.cfg.getGlobalParam('DATACHUNKDISKS') if not diskmap.has_key(machine): crtDisks = [] else: crtDisks = diskmap.get(machine) if None == crtDisks: crtDisks = [] for disk in disks: if do_add: if disk not in crtDisks: crtDisks.append(disk) else: logging.error("%s already present in %s" % (disk, machine)) error = 1 else: if disk in crtDisks: crtDisks.remove(disk) else: logging.error("%s already removed from %s" % (disk, machine)) error = 1 if error: return 1 diskmap[machine] = crtDisks if not self.cfg.setGlobalParam('DATACHUNKDISKS', diskmap): return 1 return 0
def create(self, name, params=None): coll_obj = self.construct_collection_object(name) # see if the collection/frontend/restrict exists if coll_obj.Exists(): logging.error("Can't create %s [%s]; already exists" % (coll_obj.print_name, name)) return 1 # validate the collection/frontend/restrict name if not entconfig.IsNameValid(name): logging.error("Invalid %s name %s -- cannot create" % (coll_obj.print_name, name)) return 2 # check license if not self.check_max_license(): return 5 ok = 0 try: # create the collection/frontend/restrict object try: if not coll_obj.Create(params=params): return 3 ok = 1 except Exception, e: (t, v, tb) = sys.exc_info() exc_msg = string.join(traceback.format_exception(t, v, tb)) logging.error(exc_msg) ok = 0 finally: if not ok: # cleanup logging.error("Failed to create %s [%s]" % (coll_obj.print_name, name)) coll_obj.Delete() return 4 # log this creation msg = M.MSG_LOG_CREATE_COLLECTION % (coll_obj.print_name, name) self.writeAdminRunnerOpMsg(msg) return 0
def removefileifexists(self, filename): '''Remove a file if it exists, return 0 if removal succeeded''' self.updatelock.acquire() try: try: # The caller may want to remove GFS file or localfile, # call fileutil to take care of all of them. rm_cmd = "rm -f %s" % filename err, out = E.run_fileutil_command(self.cfg.globalParams, rm_cmd) if err != E.ERR_OK: logging.error("Failed to remove file %s" % filename) logging.error("fileutil output: %s" % out) return "1" else: return "0" except IOError, e: logging.error("Failed to remove file %s" % filename) logging.error(str(e)) return "1" finally: self.updatelock.release()
def checkFiles(self): for dir in [self.working_dir]: if not os.path.isdir(dir): logging.error("directory %s doesn't exist" % dir) return 0 if not os.path.isabs(dir): logging.error("directory %s is not an absolute path" % dir) return 0 for file in [ self.box_private_keyring, self.box_public_keyring, self.license_public_keyring, self.license_private_keyring ]: if not os.path.isfile(file): logging.error("key file %s doesn't exist" % file) return 0 if not os.path.isabs(file): logging.error("file %s is not an absolute path" % file) return 0 return 1
def installcert(self): """ installs the staging certificate as the currently installed certificate returns: 0 on success, and 1 on failure """ self.updatelock.acquire() try: # first verify that the staging certificate is a valid file verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") ) outputList = [] verifycode = E.execute(['localhost'], verifycmd, outputList, 60) if verifycode != 0: E.rm(['localhost'], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME")) logging.error("Verify failed for certificate [%s]; error code: %d" % (str(outputList), verifycode) ) return "1" # distribute the staging certificate retcode = E.distribute(self.cfg.getGlobalParam("MACHINES"), ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), 60) if retcode != 0: logging.error("Couldn't distribute apache cert, error %d" % retcode) # next, generate the certificate on all machines cmd = "secure_script_wrapper -p2 %s installcert %s" % ( self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME")) outputList = [] retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList, 60) if retcode != 0: logging.error("Couldn't install cert: %s" % str(outputList)) return "1" self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_CERT_INSTALLED) finally: self.updatelock.release() return "0"
def Create(self, patchExisting=true, params=None): # If patchExisting is true, it indicates this is a version manager patch # upgrade so only newly added config files will be created in patched # version. # If params is not None, then after setting up the object with the # defaults, we override values from params (must be a dictionary). This # happens before patching values. # start with the defaults collection = self.config.var_copy(self.default_value_var) # override with any values from params, but only if the key from params # exists in the collection. if params: for var in params.keys(): if collection.has_key(var): collection[var] = params[var] # give it the correct name collection[self.name_var] = self.name if patchExisting: # update with values in current object. for var in collection.keys(): if self.has_var(var): collection[var] = self.get_var(var) # set the collection (so that derived vars get set) self.config.set_var((self.base_map_var, self.name), collection, validate=0) if not self.CreateDirs() and not patchExisting: logging.error("error creating %s dirs: %s" % (self.print_name, self.name)) return false if not self.InitDefaultFiles(patchExisting): logging.error("error creating %s files; %s" % (self.print_name, self.name)) return false if not self.config.Save(): logging.error("error saving config file") return false self.config.DistributeAll() return true
def _EvalBorgmonReply(self, expr, reply): """ evaluate the reply from Borgmon to a /jseval request. Args: expr: 'job:overall-urls-crawled:sum' reply: '[\n15709\n]' Return 15709.0 (float), or None if failed to eval. If the result is NaN, it returns 'NaN' since caller may wish to take special action in that case. """ reply_stripped = reply.replace('\n', '') if reply_stripped.lower().find('nan') >= 0: logging.error('NaN value for ' + expr) return 'NaN' try: reply_list = eval(reply_stripped) except (SyntaxError, NameError): logging.error('failed to evaluate ' + reply) return None if len(reply_list) != 1: logging.error('non-scalar return: ' + reply) return None val = reply_list[0] # sometimes we get something like {'':0}, so: if type(val).__name__ == 'dict': return val.values()[0] else: try: # just try to make it a float return float(val) except ValueError: logging.error('failed to parse ' + str(val)) return None
def setCounterFile(self, filename, count): """ it tries to set the file with the given counter. and it will check after writing. """ # if the file already has the right value, we're done logging.info('Initial check to see if correct value is there') if self.getCounterFromFile(filename) == count: return True try: f = open(filename, "w") f.write(repr(long(count))) f.flush() if os.fdatasync(f.fileno()): # force data to be written logging.error("File %s not written to disk correctly" % filename) return False assert (f.close() == None) # make sure close is sucessful except ValueError: logging.error("Invalid value for counter: %s" % repr(count)) return False except IOError, e: logging.error("Error setting count on %s -- %s" % (filename, e)) return False
def run(self): # Stop all services on all machines ret = None for service in self.services_to_stop_: logging.info("STATUS: STOP %s service %s on %s" % (service, self.version_, self.machine_)) func = lambda: self.stop_service(service) ret = try_repeatedly(func, success=0) if ret: self.msg_ = "Error stopping %s on %s." % (service, self.machine_) logging.error("STATUS: %s" % self.msg_) self.err_ = ret return self.stopped_.extend([service]) # Change the STATE file # TODO(zsyed): Move STATE file to chubby. logging.info("STATE: version %s on %s: %s" % (self.version_, self.machine_, self.target_state_)) if not install_utilities.set_install_state( self.machine_, E.getEnterpriseHome(), self.target_state_): # Well here is not clear that we want to exit half way .. logging.error("ERROR changing state on machine %s. " \ "Please rollback and try again" % self.machine_ ) self.err_ = ret self.msg_ = "Cannot change STATE file." return # Start all services to be started for service in self.services_to_start_: logging.info("STATUS: START %s service %s on %s" % (service, self.version_, self.machine_)) func = lambda: self.start_service(service) ret = try_repeatedly(func, success=0) if ret: self.msg_ = "Error starting %s on %s" % (service, self.machine_) logging.error("STATUS: %s" % self.msg_) self.err_ = ret return self.started_.extend([service])
def ExecuteHTTPGet(self, handler, timeout=120): """executes the given http GET command on admin-runner and returns a tuple (succeeded_flag, response) """ logging.info("Executing get command: %s" % handler) ok = False response = None signal.signal(signal.SIGALRM, AlarmHandler) cmd_url = 'http://%s:%s/%s' % (self.machine, self.port, handler) try: try: signal.alarm(timeout) response = urllib.urlopen(cmd_url).read() ok = True finally: signal.alarm(0) except IOError: pass if not ok: logging.error("execution of %s resulted in : %s" % (cmd_url, response)) return (ok, response) # remove last two line (ACKGoogle) lines = string.split(response, "\n") if len(lines) < 2: logging.error("Bad admin runner response: %s" % response) return (0, response) if "NACKgoogle" == string.strip(lines[-2]): # some error must have occured.. logging.error("AdminRunner error: %s" % lines[-2]) return (0, lines[-1]) return (ok, string.join(lines[:-2], "\n"))
def Destroy(self): """Unmount the mounted directories and remove the dirs under chroot.""" retry_count = 3 while retry_count: (in_use, process_list) = self.IsUsed() if in_use: logging.info('Chroot in use, stop processes using it') status_stop = self.StopProcesses(process_list) retry_count -= 1 time.sleep(1) else: break (in_use, process_list) = self.IsUsed() if in_use: logging.error('Unable to umount the directories') return (-1, 'Failed to unmount directories') return_status = 0 return_message = 'Success' for mount in JAIL_MOUNT_RW_BINDS: root_mount = ('%s%s') % (self.__root, mount) umount_command = ('%s %s') % (SYS_UMOUNT, root_mount) (status_umount, result) = self.__os.Execute(umount_command) if status_umount: return_status = status_umount return_message = result logging.error('Umount the bind mount %s failed with %s' % (root_mount, result)) for mount in JAIL_MOUNT_BINDS: root_mount = ('%s%s') % (self.__root, mount) umount_command = ('%s %s') % (SYS_UMOUNT, root_mount) (status_umount, result) = self.__os.Execute(umount_command) if status_umount: return_status = status_umount return_message = result logging.error('Umount the bind mount %s failed with %s' % (root_mount, result)) return (return_status, return_message)
def deleteQueue(self, encQueueName): """Delete a complete crawl queue or cancel a pending queue.""" self.cqueuelock.acquire() try: found = false queues = self.listCrawlQueues() queueName = urllib.unquote(encQueueName) for queue in queues: if queueName == queue.queueName: found = true break if not found: logging.error('Queue not found') return C.CRAWLQUEUE_NAME_NOT_FOUND if queue.completeState == C.CRAWLQUEUE_STATUS_PENDING: self.joblock.acquire() try: if self.runningJob.has_key(encQueueName): logging.info('About to stop the running job.') self.runningJob[encQueueName].join(1) del self.runningJob[encQueueName] else: logging.error('Found a pending crawl queue with no running thread.') finally: self.joblock.release() logging.info('Queue %s incomplete. Canceling.' % encQueueName) queues.remove(queue) self.RemoveOldQueue(encQueueName) if not self.setCrawlQueuesLocked(queues): logging.error('Failed to update queue list.') return C.CRAWLQUEUE_INTERNAL_ERROR finally: self.cqueuelock.release() return C.CRAWLQUEUE_OK
def _ExecuteCommand(cmd, machines=['localhost'], out=None, timeout=15 * 60, num_tries=2, error_filter=None): """ Helper file to execute a command multiple times until it succeeds. Input: cmd: command to run timeout: seconds to allow command to run machines: machine list on which to execute out: list of output lines num_tries: number of times to retry command error_filter: A function that is called if the cmd execution failed. The takes a list of output lines as input, can filter the errors, and decideds if the execution counts as successful. Returns 0 if execution succeeded, 1 if the execution failed. Output: 0 for success, 1 for failure """ if out == None: out = [] for i in range(0, num_tries): if E.execute(machines, cmd, out, timeout) == 0: # Command was successful return 0 # Execution failed if error_filter and error_filter(out) == 0: # Error filter says error is ok, execution counts as success logging.error('Cmd %s ignoring error: %s' % (cmd, ''.join(out))) return 0 if i < num_tries - 1: logging.error('Cmd %s error: %s, retrying.' % (cmd, ''.join(out))) else: logging.error('Cmd %s error: %s, failing.' % (cmd, ''.join(out))) return 1
def _stop(self, op): """ Called by both babysit and stop operations: Args: op: On Master: if op == "stop", kill the adminconsole also. If op is babysit, just restart the loop. The loop will overwrite the pid file. On non-masters: If pid file exists, kill entire process group (include loop and admin console) Remove pid file. Returns: 1 always (legacy reasons. TODO(vish): fix later) """ # kill the loop_AdminConsole process and remove the pid file, if # the pid in loop_AdminConsole.pid is a pid of a loop_AdminConsole # process try: loopAdminConsole_pid_str = open(self.loopAdminConsole_pid_file, "r").read() if not self.local_machine_is_master: logging.info('Killing admin console and loop') try: while 1: pid_cmdline = open( '/proc/%s/cmdline' % loopAdminConsole_pid_str, 'r').read() if (pid_cmdline.find('loop_AdminConsole.py') != -1 and len(loopAdminConsole_pid_str) > 0): kill_cmd = "kill -9 %s" % loopAdminConsole_pid_str logging.info("Running: %s" % kill_cmd) os.system(kill_cmd) time.sleep(1) else: break except IOError: # the process does not exist pass os.system("rm -f %s" % self.loopAdminConsole_pid_file) except IOError: # the pid file does not exist pass if self.local_machine_is_master: try: if op == 'stop': self.find_and_kill_loop_adminconsole() self.kill_adminconsole() return 1 except: logging.error( "Stopping loop admin console failed. Exception: %s" % str(sys.exc_info()[:2])) return 1 else: # this machine is not master. # Just be doubly sure, kill any rogue loops and the admin console using ps # even though we have the pid # Make really really sure that admin console is dead for 5 s while 1: self.find_and_kill_loop_adminconsole() self.kill_adminconsole() time.sleep(5) ac_alive = self.is_admin_console_alive( 1) # check if admin console is alive. give it # 1s to respond. if ac_alive: logging.info( 'admin console is not dead. Trying again after 5s.') else: # Ok, now we are really sure admin console and loop are dead. break return 1
prev_prev_name = prev_prev_frame.f_globals.get('__name__', None) if (prev_prev_name != '__main__' and not prev_prev_name.endswith('.appcommands')): return # just in case there's non-trivial stuff happening in __main__ del tb try: CallWithExitFix(really_start, is_old_style) except SystemExit, e: raise except Exception, e: # If we are using swigged logging, make sure the exception that # killed us actually gets logged in the INFO log. if logging.is_using_cpp_logging(): logging.error('Top-level exception: %s' % e) logging.error(''.join(traceback.format_exception(*sys.exc_info()))) raise def usage(shorthelp=0, writeto_stdout=0, detailed_error=None, exitcode=None): """ Extracts the __doc__ string from the __main__ module and writes it to stderr. If the argument writeto_stdout=1 it is written to stdout. Args: shorthelp: print only flags from this module, rather than all flags. writeto_stdout: write help message to stdout, rather than to stderr. detailed_error: additional detail about why usage info was presented. exitcode: if set, exit with this status code after writing help. """
def main(argv): argc = len(argv) if argc < 10: sys.exit(__doc__) config = entconfig.EntConfig(argv[0]) if not config.Load(): sys.exit(__doc__) pywrapbase.InitGoogleScript('', [ 'foo', '--gfs_aliases=%s' % config.var("GFS_ALIASES"), '--bnsresolver_use_svelte=false', '--logtostderr' ], 0) gfile.Init() client = argv[1] date_fields = string.split(argv[2], '_') date_range = liblog.ParseDateRange(date_fields[0], date_fields[1:]) withResults = argv[3] topCount = argv[4] diagnosticTerms = argv[5] html_file = argv[6] valid_file = argv[7] new_html_file = argv[8] new_valid_file = argv[9] if not date_range: sys.exit(__doc__) first_date, last_date, printable_date, file_date = date_range if last_date.as_int() < first_date.as_int(): logging.fatal('invalid date range') gws_log_dir = liblog.get_gws_log_dir(config) collect_dir = liblog.get_collect_dir(config) partition_dir = liblog.get_partition_dir(config) directory_map_file = liblog.get_directory_map_file(config) # Collect logs first from all gws nodes and preprocess # logs to make sure logs are up to date. all_machines = config.var('MACHINES') collect_logs.CollectLogs(all_machines, gws_log_dir, collect_dir) preprocess_logs.PartitionLogs(config) gws_logs = liblog.FindClientLogFiles(partition_dir, directory_map_file, client, first_date, last_date) # note that collection (client) has been factored into gwslog_dir. result = CreateLogReport(config, printable_date, gws_logs, config.var('MAIN_GOOGLE3_DIR'), withResults, topCount, diagnosticTerms, html_file, valid_file, new_html_file, new_valid_file) if result == liblog.FAILURE: logging.error('CreateLogReport Failed') sys.exit(result)
def CreateDefaultQueryExpEntry(self): """ create a default query exp entry unconditionally when first startup """ logging.info("Entering CreateDefaultQueryExpEntry") names = [ ("EN", "Google_English_stems"), ("FR", "Google_French_stems"), ("DE", "Google_German_stems"), ("IT", "Google_Italian_stems"), ("PT", "Google_Portuguese_stems"), ("ES", "Google_Spanish_stems"), ("NL", "Google_Dutch_stems"), ] # Do as much as possible through the Query expansion base, so we are # consistent with creating a new entry via the AdminConsole. qe_base = query_expansion_handler.QueryExpansionBase(self) # This accounts for ENT_STEMS_EN_SOURCE, ... ENT_STEMS_PT_SOURCE for (lang, name) in names: filename = self.globalParams.var("ENT_STEMS_%s_SOURCE" % lang) if filename: logging.info( "Copy source stemming file %s to query expansion entry %s" % (filename, name)) try: contents = open(filename, "r").read() except IOError, e: logging.error("Failed to read stems from %s" % filename) return false entry = qe_base.ConstructCollectionObject(name) params = {} # Set default params, and then override them with existing values if lang == 'EN': params[C.ENABLED] = 1 else: params[C.ENABLED] = 0 params[C.DELETABLE] = 0 params[C.DOWNLOADABLE] = 1 params[C.ENTRY_TYPE] = C.QUERY_EXP_FILETYPE_SYNONYMS if entry.Exists(): for c in (C.ENABLED, C.DELETABLE, C.DOWNLOADABLE, C.ENTRY_TYPE): if entry.has_var(c): params[c] = entry.get_var(c) errors = qe_base.Upload(entry, 1, params, 1, contents) if errors != validatorlib.VALID_OK: logging.error( "Creating or updating query exp data, errors: " + repr(ferrors)) return false else: logging.info( "Additional stems for %s not created - no filename in config." % lang)