Ejemplo n.º 1
0
  def drain_urlmanagers(self):
    """
    We need to do this before advancing the epoch -- we can do it
    multiple times
    """
    urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager")
    num_shards = self.cfg.globalParams.GetNumShards('urlmanager')
    epoch = self.cfg.getGlobalParam('RT_EPOCH')

    for (host, port) in urlmanagers:
      # We don't do it here directly because of the timeout
      cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
            "./port_talker.py %s %d 'd DumpingStatusTable' %d" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.entHome,
          host, port, 300) # 5 min timeout
      err = E.execute([E.getCrtHostName()], cmd, None, 0)
      if E.ERR_OK != err:
        logging.error("Error draining urlmanagers [%s]" % err)
        return 1

      # Make sure that the file is out
      shard_num = servertype.GetPortShard(port)
      file = "%surlmanager_out_table_%02d_of_%02d_epoch%010d" % (
        self.cfg.getGlobalParam('NAMESPACE_PREFIX'),
        shard_num, num_shards, epoch)
      err, out = E.run_fileutil_command(self.cfg.globalParams, "ls %s" % file)
      if E.ERR_OK != err:
        logging.error("The status table file [%s] is not there" % file)
        return 1

    return 0
Ejemplo n.º 2
0
  def browse(self, clientName, virtualFile, fromLine, toLine,
             grepString, fileArgs):

    grepString = urllib.quote_plus(grepString.strip())
    fromLine = string.atoi(fromLine)
    toLine   = string.atoi(toLine)

    fileLocation = self.makePhysicalFile(virtualFile, clientName, grepString,
                                         fileArgs)
    if not fileLocation:
      raise ar_exception.ARException((
        ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

    # Get valid start line and end line
    numLines = toLine - fromLine + 1;
    if numLines < 0:  numLines = 0
    toLine = fromLine + numLines - 1

    # Get the lines
    result = []

    parsedGrepString = commands.mkarg(grepString)
    if ( E.ERR_OK != E.execute(
        [E.getCrtHostName()], "head -n %s %s | tail -n %s | grep -i -F -- %s" %
        (toLine, fileLocation, numLines, parsedGrepString), result, false) ):
      return "0"

    return "%s\n%s" % (len(result[0]), result[0])
def SyncLogsWithCanonical(ver, canonical):
  """ sync logs with the canonical

  Arguments:
    ver: '4.6.5'
    canonical: 'ent1'
  """

  gfs_master_nodes, _ = core_utils.GFSMasterNodes()
  gfs_master_nodes.remove(canonical)
  gfs_master_dir = '/export/hda3/%s/data/gfs_master' % ver
  log_dir = '%s/ent.gfsmaster' % gfs_master_dir
  backup_log_dir = '%s.backup.%d' % (log_dir, int(time.time()))
  vars = {'gfs_master_dir':     gfs_master_dir,
          'log_dir':            log_dir,
          'backup_log_dir':     backup_log_dir,
          'canonical':          canonical
         }
  cmd = ('rm -rf %(log_dir)s.backup*; '
         'mv %(log_dir)s %(backup_log_dir)s; '
         'mkdir -p %(log_dir)s; chown nobody:nobody %(log_dir)s; '
         'rsync -c -e ssh -aHpogt %(canonical)s:%(log_dir)s %(gfs_master_dir)s'
         % vars
        )
  out = []
  enthome = '/export/hda3/%s' % ver
  E.execute(gfs_master_nodes, cmd, out, 1200, 1, 0, enthome)
Ejemplo n.º 4
0
def KillProcessIfNotMaster(config):
    # kill babysitter
    util_dir = "%s/local/google3/enterprise/legacy/util" % config.ENTERPRISE_HOME
    E.killBabysitter(util_dir, config.GetConfigFileName(), config.VERSION)

    # stop snmpd when not a master
    disableSnmp()
Ejemplo n.º 5
0
  def deleteCollection(self, collection):
    """Delete all reports and logs for a particular collection."""
    self.logreplock.acquire()
    try:
      for reportType in [liblog.RAW_REPORT, liblog.SUMMARY_REPORT]:
        reports = self.getLogReports(collection, reportType)
        for report in reports:
          # stop running job if report is being (re)generated.
          if report.completeState != COMPLETE:
            self.stopRunningJob(self.jobName(report))

          # delete data files if any.
          (html_file, valid_file) = liblog.get_report_filenames(self.entConfig,
                                     reportType, report.reportName, collection)
          self.RemoveReportFiles(html_file, valid_file)
        self.reportCount[reportType] -= len(reports)
        logging.info('Delete total %d reports of type %s for collection %s.' % (
          len(reports), reportType, collection))
        listfile = liblog.get_report_list_filename(self.entConfig,
                                                   reportType, collection)
        (err, out) = E.run_fileutil_command(self.entConfig,
                                            'rm -f %s' % listfile)
        if err:
          logging.error('Cannot remove list file %s.' % listfile)

      report_collection_dir = liblog.get_report_collection_dir(self.entConfig,
                                                               collection)
      (err, out) = E.run_fileutil_command(self.entConfig,
                                          'rmdir %s' % report_collection_dir)
      if err:
        logging.error('Cannot delete unused directory %s' % \
                      report_collection_dir)
    finally:
      self.logreplock.release()
Ejemplo n.º 6
0
 def replicateConfig(self, machines):
     """ Replicates the config to a list of machines """
     cmd = (". %s && cd %s/local/google3/enterprise/legacy/scripts && "
            "./replicate_config.py %s %s" %
            (self.getGlobalParam("ENTERPRISE_BASHRC"), self.entHome,
             E.getCrtHostName(), self.globalParams.GetConfigFileName()))
     return E.execute(machines, cmd, None, true)
Ejemplo n.º 7
0
def SyncOpLogs(all_machines, log_dir):
  """ This will sync the AdminRunner.OPERATOR.* logs to all machines """

  # We have to run this only on master
  master = find_master.FindMaster(2100, all_machines)

  # The name of this machine
  crt_machine = E.getCrtHostName()
  if len(master) == 1 and master[0] == crt_machine:
    for machine in all_machines:
      if machine != crt_machine:
        src_dir = '%s/AdminRunner.OPERATOR.*' % (log_dir)
        dest_dir = '%s:/%s' % (machine, log_dir)

        logging.info('Collecting operator logs from %s into %s' % (
          src_dir, dest_dir))

        rsync_cmd = 'rsync --timeout=20 --size-only -vau ' \
                     ' -e ssh %s %s/' % (src_dir, dest_dir)

        # rsync the logs
        lockfile = '%s/syncops_lock' % log_dir
        lock = E.acquire_lock(lockfile, 1, breakLockAfterGracePeriod = 0)
        if lock == None:
          logging.info('Cannot grab the lock. Return!')
          return

        try:
          (status, output) = liblog.DoCommand(rsync_cmd)
          if status != 0:
            logging.error('Failed to collect logs from %s: %s' % (
              machine, output))
        finally:
          lock.close()
          os.unlink(lockfile)
Ejemplo n.º 8
0
def CollectLogs(all_machines, gws_log_dir, log_collect_dir):
  # We only run this on oneway or master node of cluster.
  master = find_master.FindMaster(2100, all_machines)
  crt_machine = E.getCrtHostName()
  if len(all_machines) != 1 and (len(master) != 1 or master[0] != crt_machine):
    logging.info('Not a oneway or cluster master node. Return!')
    return

  lockfile = '%s/lock' % log_collect_dir
  # waiting up to 5 minutes for the lock.
  lock = E.acquire_lock(lockfile, 30, breakLockAfterGracePeriod = 0)
  if lock == None:
    logging.info('Cannot grab the lock. Return!')
    return

  try:
    for machine in all_machines:
      src_pattern = '%s/partnerlog.*' % gws_log_dir
      dest_dir = '%s/%s' % (log_collect_dir, machine)

      # If it's a oneway or master node, we make a symlink to gws_log_dir instead
      # of rsync to log_collect directory
      if machine == crt_machine:
        # To make it backward compatible, we need to remove old dest_dir if it's
        # already an existing directory from previous version because in previous
        # versions we created a dir and rsynced files even on the master node and
        # one-ways.
        if os.path.exists(dest_dir) and not os.path.islink(dest_dir):
          if not E.rm(master, '%s/*' % dest_dir) or not E.rmdir(master, dest_dir):
            logging.error('Directory %s exists and cannot be cleaned.', dest_dir)
            continue
          logging.info('Cleaned existing directory %s.', dest_dir)

        if E.ln(master, gws_log_dir, dest_dir):
          logging.info('Symlink %s to directory %s:%s for logs' %
                       (dest_dir, machine, gws_log_dir))
        else:
          logging.error('Cannot make a symlink from %s to %s' %
                        (dest_dir, gws_log_dir))
        continue

      # For non-master nodes on cluster, we need to rsync those files to master node
      logging.info('Collecting logs from %s:%s into %s' % (
        machine, src_pattern, dest_dir))

      # make log directories if needed
      liblog.MakeDir(dest_dir)

      # rsync all files from one remote machine in one command.
      rsync_cmd = 'rsync --timeout=60 --size-only -vau ' \
                  ' -e ssh %s:%s %s/' % (machine, src_pattern, dest_dir)

      # rsync the logs
      (status, output) = liblog.DoCommand(rsync_cmd)
      if status != 0:
        logging.error('Failed to collect logs from %s: %s' % (
          machine, output))
  finally:
    lock.close()
    os.unlink(lockfile)
Ejemplo n.º 9
0
def SyncOneboxLog(config):
  """Syncs Local Onebox log file with GFS Onebox Log file ONLY on clusters.
  As of 4.6.4, this is called from scripts/periodic_script.py and from
  onebox_handler.py, when the user does View Log AND the machine is a cluster.
  """
  onebox_port = servertype.GetPortBase('oneboxenterprise')
  onebox_node = config.SERVERS[onebox_port]
  crt_machine = E.getCrtHostName()
  ent_config_type = config.var('ENT_CONFIG_TYPE')

  #If onebox server is not running no need to sync.
  if ent_config_type != 'CLUSTER' or crt_machine != onebox_node[0]:
    return

  tmp_dir     = config.var('TMPDIR')
  gfs_cell    = config.var('GFS_CELL')
  local_log_name = os.path.join(tmp_dir, config.var('ENTERPRISE_ONEBOX_LOG'))
  gfs_log_name   = os.path.join(os.sep, 'gfs', gfs_cell,
                                config.var('ENTERPRISE_ONEBOX_LOG'))

  equalize_command = 'equalize %s %s' % (local_log_name, gfs_log_name)

  # fileutil equalize copies only the difference of the log files.
  err, out = E.run_fileutil_command(config, equalize_command)
  if not err:
    return

  # files didn't match in the begining, possibly a new log file would have
  # created, copy the whole log file in such case.
  copy_command = 'cp -f %s %s' % (local_log_name, gfs_log_name)
  err, out = E.run_fileutil_command(config, copy_command)

  if err:
    logging.error('Error while syncing onebox logs.')
Ejemplo n.º 10
0
def kill_service(services, machines):
  """Kill all processes associated with specified services on the specified
  machines. E.execute() sends the commands concurrently when there is more than
  one node.

  Args:
    services: list of services to kill. 'adminconsole' and 'adminrunner' are
              currently supported.
    machines: list of hostnames
  """
  # Map of services to the command that kills the service
  find_service_pid_cmd = {
      'adminconsole': ("ps -e -o pid,args --width 100 | "
                       "grep loop_AdminConsole.py | grep -v grep | "
                       "awk '{print $1}' ; "
                       "%s" % python_kill.GetServicesListeningOn(['8000'])),
      'adminrunner': ("ps -e -o pid,args --width 100 | "
                      "grep loop_AdminRunner.py | grep -v grep | "
                      "awk '{print $1}' ; "
                      "%s" % python_kill.GetServicesListeningOn(['2100'])),
  }

  for service in services:
    if service not in find_service_pid_cmd:
      logging.error('kill_service: Unrecognised service "%s"' % service)
    else:
      logging.info('kill_service: Killing service "%s" on %d nodes...' %
                   (service, len(machines)))
      kill_cmd = ('sh -c "(kill `%s`; sleep 3; kill -9 `%s`; true)" '
                  '> /dev/null 2>&1' %
                  (find_service_pid_cmd[service],
                   find_service_pid_cmd[service]))
      E.execute(machines, kill_cmd, [], alarm=1, verbose=0)
      logging.info('kill_service: Done killing service "%s"' % service)
Ejemplo n.º 11
0
def main(unused_args):
  if not E.access([E.LOCALHOST], FLAGS.enthome, 'rd'):
    sys.exit("Invalid enthome %s" % FLAGS.enthome)

  if ( not FLAGS.box_keys_dir or
       not E.access([E.LOCALHOST], FLAGS.box_keys_dir, 'rwd') ):
    sys.exit("Invalid box_keys_dir %s" % FLAGS.box_keys_dir)
  if ( not FLAGS.license_keys_dir or
       not E.access([E.LOCALHOST], FLAGS.license_keys_dir, 'rwd') ):
    sys.exit("Invalid license_keys_dir %s" % FLAGS.license_keys_dir)

  if FLAGS.installstate not in ["INSTALL", "ACTIVE", "SERVE", "TEST"]:
    sys.exit("Invalid --installstate %s" % FLAGS.installstate)

  reset_index.SetResetStatusCacheTimeout(FLAGS.reset_status_cache_timeout)

  as = adminrunner_server.AdminRunnerServer(FLAGS.enthome,
                                            FLAGS.installstate,
                                            FLAGS.port,
                                            FLAGS.box_keys_dir,
                                            FLAGS.license_keys_dir)

  # make sure we have been given a config file
  if (not as.cfg.getGlobalParam(C.ENT_SYSTEM_HAS_VALID_CONFIG) and
      as.cfg.getInstallState() != 'INSTALL'):
    logging.fatal("adminrunner doesn't have a config file; you must "\
                  "install a conf rpm or run it with "\
                  "--installstate=INSTALL (for migration)")
    return # just in case fatal doesn't exit
Ejemplo n.º 12
0
    def setcert(self, certBody):
        """ Takes a cert file body as the input, and saves it
    as the staging certificate
    returns 0 on success, or 1 on failure
    """

        retval = 0
        self.updatelock.acquire()
        try:
            try:
                open(ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), "w").write(certBody)
            except IOError:
                retval = 1
                logging.error(
                    "Couldn't save certificate to [%s]"
                    % (ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                )

            if retval == 0:
                verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
                    self.sslWrapperPath,
                    self.cfg.getGlobalParam("ENTERPRISE_HOME"),
                )
                outputList = []
                verifycode = E.execute(["localhost"], verifycmd, outputList, 60)

                if verifycode != 0:
                    retval = 1
                    E.rm(["localhost"], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                    logging.error("Couldn't verify certificate [%s]; error code: %d" % (str(outputList), verifycode))

        finally:
            self.updatelock.release()

        return "%d" % retval
Ejemplo n.º 13
0
def SetInitState(cfg, state):
  """Sets system's initialization state. For oneway, it stores it in
  C.ENT_SYSTEM_INIT_STATE. For Clusters, it stores it in chubby file
  /ls/ent<version>/ENT_SYSTEM_INIT_STATE.

  @param cfg - of type configurator.
  @param state - string
  """
  # oneway?
  if 1 == len(core_utils.GetNodes()):
    cfg.setGlobalParam(C.ENT_SYSTEM_INIT_STATE, state)
    return

  tmpfile = E.mktemp('/export/hda3/tmp')
  try:
    f = open(tmpfile, 'w')
    f.write(state)
    f.close()
  except IOError:
    logging.fatal('Cannot write to temp file %s' % tmpfile)
    return
  version = cfg.getGlobalParam('VERSION')
  lockserv_cmd_prefix = core_utils.GetLSClientCmd(version, is_test(version))
  chubby_root_dir = '/ls/%s' % core_utils.GetCellName(version)
  write_cmd =  '%s cp %s %s/%s' % (lockserv_cmd_prefix,
      tmpfile, chubby_root_dir, 'ENT_SYSTEM_INIT_STATE')
  logging.info('setting system init state to: %s', state)
  E.exe_or_fail(write_cmd)
  E.exe('rm -rf %s' % tmpfile)
Ejemplo n.º 14
0
    def browse(self, clientName, virtualFile, fromLine, toLine, grepString,
               fileArgs):

        grepString = urllib.quote_plus(grepString.strip())
        fromLine = string.atoi(fromLine)
        toLine = string.atoi(toLine)

        fileLocation = self.makePhysicalFile(virtualFile, clientName,
                                             grepString, fileArgs)
        if not fileLocation:
            raise ar_exception.ARException(
                (ar_exception.LOADPARAMS, M.ERR_INVALIDFILESPECIFICATION))

        # Get valid start line and end line
        numLines = toLine - fromLine + 1
        if numLines < 0: numLines = 0
        toLine = fromLine + numLines - 1

        # Get the lines
        result = []

        parsedGrepString = commands.mkarg(grepString)
        if (E.ERR_OK !=
                E.execute([E.getCrtHostName()],
                          "head -n %s %s | tail -n %s | grep -i -F -- %s" %
                          (toLine, fileLocation, numLines, parsedGrepString),
                          result, false)):
            return "0"

        return "%s\n%s" % (len(result[0]), result[0])
Ejemplo n.º 15
0
def _RunServeCmd(cfg, version, cmd, allnodes=0):
    """Run serve_service command.
  cmd: 'stop', 'start', 'activate', 'deactivate'
  allnodes: 1 to run command on all nodes
  """
    serve_service_cmd = (
        '/export/hda3/%s/local/google3/enterprise/legacy/scripts/'
        'serve_service.py %s %s' %
        (version, cfg.getGlobalParam('ENTERPRISE_HOME'), cmd))
    logging.info('Running: %s' % serve_service_cmd)
    if allnodes:
        machines = cfg.getGlobalParam(C.MACHINES)
    else:
        machines = [E.getCrtHostName()]

    if E.execute(machines,
                     SECURE_WRAPPER_COMMAND % ( \
                          cfg.getGlobalParam('ENTERPRISE_HOME'),
                          '-p2',
                          serve_service_cmd),
                     None, 0) != E.ERR_OK:
        logging.error('%s: failed' % serve_service_cmd)
        return 1
    logging.info('%s: completed' % serve_service_cmd)
    return 0
Ejemplo n.º 16
0
def SyncLogsWithCanonical(ver, canonical):
    """ sync logs with the canonical

  Arguments:
    ver: '4.6.5'
    canonical: 'ent1'
  """

    gfs_master_nodes, _ = core_utils.GFSMasterNodes()
    gfs_master_nodes.remove(canonical)
    gfs_master_dir = '/export/hda3/%s/data/gfs_master' % ver
    log_dir = '%s/ent.gfsmaster' % gfs_master_dir
    backup_log_dir = '%s.backup.%d' % (log_dir, int(time.time()))
    vars = {
        'gfs_master_dir': gfs_master_dir,
        'log_dir': log_dir,
        'backup_log_dir': backup_log_dir,
        'canonical': canonical
    }
    cmd = (
        'rm -rf %(log_dir)s.backup*; '
        'mv %(log_dir)s %(backup_log_dir)s; '
        'mkdir -p %(log_dir)s; chown nobody:nobody %(log_dir)s; '
        'rsync -c -e ssh -aHpogt %(canonical)s:%(log_dir)s %(gfs_master_dir)s'
        % vars)
    out = []
    enthome = '/export/hda3/%s' % ver
    E.execute(gfs_master_nodes, cmd, out, 1200, 1, 0, enthome)
Ejemplo n.º 17
0
def _RunServeCmd(cfg, version, cmd, allnodes=0):
  """Run serve_service command.
  cmd: 'stop', 'start', 'activate', 'deactivate'
  allnodes: 1 to run command on all nodes
  """
  serve_service_cmd = (
      '/export/hda3/%s/local/google3/enterprise/legacy/scripts/'
      'serve_service.py %s %s' % (version,
                                  cfg.getGlobalParam('ENTERPRISE_HOME'),
                                  cmd))
  logging.info('Running: %s' % serve_service_cmd)
  if allnodes:
    machines = cfg.getGlobalParam(C.MACHINES)
  else:
    machines = [E.getCrtHostName()]

  if E.execute(machines,
                   SECURE_WRAPPER_COMMAND % ( \
                        cfg.getGlobalParam('ENTERPRISE_HOME'),
                        '-p2',
                        serve_service_cmd),
                   None, 0) != E.ERR_OK:
    logging.error('%s: failed' % serve_service_cmd)
    return 1
  logging.info('%s: completed' % serve_service_cmd)
  return 0
Ejemplo n.º 18
0
 def replicateConfig(self, machines):
   """ Replicates the config to a list of machines """
   cmd = (". %s && cd %s/local/google3/enterprise/legacy/scripts && "
          "./replicate_config.py %s %s" % (
     self.getGlobalParam("ENTERPRISE_BASHRC"), self.entHome,
     E.getCrtHostName(), self.globalParams.GetConfigFileName()))
   return E.execute(machines, cmd, None, true)
Ejemplo n.º 19
0
def KillProcessIfNotMaster(config):
  # kill babysitter
  util_dir = "%s/local/google3/enterprise/legacy/util" % config.ENTERPRISE_HOME
  E.killBabysitter(util_dir, config.GetConfigFileName(), config.VERSION)

  # stop snmpd when not a master
  disableSnmp()
Ejemplo n.º 20
0
    def installkey(self):
        """ installs the staging key as the currently installed private key
    returns:
    0 on success,
    1 on empty install key (not an error)
    2 when the private key is invalid
    3 when the private key could not be distributed
    """

        self.updatelock.acquire()
        try:

            # first verify if the staging key is empty (not an error)
            if (
                not os.path.exists(ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
            ) or 0 == len(open(ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"), "r").read()):
                return "1"

            # next verify that the staging key is a valid file
            verifycmd = "secure_script_wrapper -p2 %s verifystagingkey %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )
            outputList = []
            verifycode = E.execute(["localhost"], verifycmd, outputList, 60)

            if verifycode != 0:
                E.rm(["localhost"], ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                logging.error("Verify failed for key [%s]; error code: %d" % (str(outputList), verifycode))
                return "2"

            # distribute the staging key
            retcode = E.distribute(
                self.cfg.getGlobalParam("MACHINES"),
                ssl_cert.STAGINGKEY_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"),
                60,
            )
            if retcode != 0:
                logging.error("Couldn't distribute private key, error %d" % retcode)
                return "3"

            # next, copy the key on all machines
            cmd = "secure_script_wrapper -p2 %s installkey %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )

            outputList = []
            retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList, 60)

            if retcode != 0:
                logging.error("Couldn't install cert: %s" % str(outputList))
                return "3"

            self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_KEY_INSTALLED)
        finally:
            self.updatelock.release()

        return "0"
Ejemplo n.º 21
0
 def GetDirList(self):
     return \
       [E.joinpaths([self.config.var('GOOGLEDATA'),
                     'gws/clients', self.name]),
        E.joinpaths([self.config.var('GOOGLEDATA'),
                     'gws/p4clientinfo', self.name]),
        E.joinpaths([self.config.var('CONFIGDIR'),
                     'frontends', self.name])]
Ejemplo n.º 22
0
 def GetDirList(self):
   return \
     [E.joinpaths([self.config.var('GOOGLEDATA'),
                   'gws/clients', self.name]),
      E.joinpaths([self.config.var('GOOGLEDATA'),
                   'gws/p4clientinfo', self.name]),
      E.joinpaths([self.config.var('CONFIGDIR'),
                   'frontends', self.name])]
Ejemplo n.º 23
0
 def deactivate(self):
   """
   deactivate this service and macke the cron script unexecutable
   """
   E.exe_or_fail("/sbin/chkconfig --del %s_%s" % (
     self.service_name, self.version))
   E.exe_or_fail("chmod 644 /etc/cron.%s/cron_%s_%s" % (
     self.service_cron_time, self.service_name, self.version))
Ejemplo n.º 24
0
 def activate(self):
   """
   activates this service and makes it's periodic cron script executable
   """
   E.exe_or_fail("/sbin/chkconfig --add %s_%s" % (
     self.service_name, self.version))
   E.exe_or_fail("chmod 755 /etc/cron.%s/cron_%s_%s" % (
     self.service_cron_time, self.service_name, self.version))
Ejemplo n.º 25
0
  def installkey(self):
    """ installs the staging key as the currently installed private key
    returns:
    0 on success,
    1 on empty install key (not an error)
    2 when the private key is invalid
    3 when the private key could not be distributed
    """

    self.updatelock.acquire()
    try:

      # first verify if the staging key is empty (not an error)
      if (not os.path.exists(ssl_cert.STAGINGKEY_FILENAME % \
                             self.cfg.getGlobalParam("ENTERPRISE_HOME"))) or \
         0 == len(open(ssl_cert.STAGINGKEY_FILENAME % \
                       self.cfg.getGlobalParam("ENTERPRISE_HOME"), "r").read()):
        return "1"

      # next verify that the staging key is a valid file
      verifycmd = "secure_script_wrapper -p2 %s verifystagingkey %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") )
      outputList = []
      verifycode = E.execute(['localhost'], verifycmd, outputList, 60)

      if verifycode != 0:
        E.rm(['localhost'],
             ssl_cert.STAGINGKEY_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"))
        logging.error("Verify failed for key [%s]; error code: %d" %
                      (str(outputList), verifycode) )
        return "2"

      # distribute the staging key
      retcode = E.distribute(self.cfg.getGlobalParam("MACHINES"),
                             ssl_cert.STAGINGKEY_FILENAME %
                             self.cfg.getGlobalParam("ENTERPRISE_HOME"), 60)
      if retcode != 0:
        logging.error("Couldn't distribute private key, error %d" % retcode)
        return "3"

      # next, copy the key on all machines
      cmd = "secure_script_wrapper -p2 %s installkey %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"))

      outputList = []
      retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList,
                          60)

      if retcode != 0:
        logging.error("Couldn't install cert: %s" % str(outputList))
        return "3"

      self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_KEY_INSTALLED)
    finally:
      self.updatelock.release()

    return "0"
Ejemplo n.º 26
0
    def remove(self, machine):
        """  This removes a machine from the configuration  """

        if machine not in self.cfg.getGlobalParam('MACHINES'):
            logging.error("%s doesn't exist" % machine)
            return 1

        ver = self.cfg.getGlobalParam('VERSION')
        home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
        testver = install_utilities.is_test(ver)
        # if possible stop the core services, ignore return code
        install_utilities.stop_core(ver, home, [machine])

        if machine == E.getCrtHostName():
            logging.error("Cannot remove self")
            return 1

        # Halt the machine if APC is used.
        error = self.halt(machine)

        self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine)
        self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine)
        ret = core_utils.AddDeadNode(ver, testver, machine)
        # remove the chunkserver running on the node
        gfs_utils.DeleteGFSChunkservers(ver, testver, [machine])
        if ret:
            logging.error('Cannot add dead node to the lockserver.')
            # we ignore this error for now

        # now we need to remove the data disks that were on this machine
        data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS')
        if data_disks.has_key(machine):
            del data_disks[machine]
            if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks):
                return 1

        # This also saves the config file
        if not self.cfg.DoMachineAllocation():
            return 1

        # Now we need to restart babysitter because the old one
        # is out of sync after this
        serve_service_cmd = (
            ". %s && "
            "cd %s/local/google3/enterprise/legacy/scripts && "
            "./serve_service.py %s" %
            (self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
             self.cfg.getGlobalParam('ENTERPRISE_HOME'),
             self.cfg.getGlobalParam('ENTERPRISE_HOME')))
        E.exe("%s %s" % (serve_service_cmd, "babysit"))

        self.restart_crawl_processes(serve_service_cmd)

        if not mail_already_sent(M.MSG_MACHINEREMOVED % machine):
            SendMail.send(self.cfg, None, false,
                          M.MSG_MACHINEREMOVED % machine, "", true)

        return error
Ejemplo n.º 27
0
def StartAdminLoop(config, op='start'):
  """Start AdminRunner Loop as well as Loop for initial_config_Server"""

  install_state = install_utilities.install_state(config.VERSION)

  logging.info("Starting the AdminRunner")
  ar_args = []
  ar_args.append("--port=2100")
  ar_args.append("--enthome=%s" % config.ENTERPRISE_HOME)
  ar_args.append("--installstate=%s" % install_state)
  ar_args.append("--reset_status_cache_timeout=60")

  restart_loop_AdminRunner = 0
  if op == 'babysit':
    pidfile = E.GetPidFileName('loop_AdminRunner')
    pid = E.ReadPidFile(pidfile)
    if os_utils.GetAttr('pid', pid=pid, fallback_to_ps=0) == None:
      restart_loop_AdminRunner = 1

  if op == 'start' or restart_loop_AdminRunner:
    E.su_exe_or_fail(
      config.ENTERPRISE_USER,
      """ ps axwwwww | fgrep AdminRunner | fgrep -v fgrep | \
      colrm 7 | xargs kill -9 2> /dev/null; \
      . %(eb)s; \
      cd %(eh)s/local/google3/enterprise/legacy/scripts/ &&  \
      ENT_ID=%(v)s_crawl ./loop_AdminRunner.py \
      %(eh)s %(args)s >> \
      /%(ld)s/loop_AdminOut_`whoami` 2>&1 &""" % {
      'eh' : config.ENTERPRISE_HOME,
      'eb' : config.ENTERPRISE_BASHRC,
      'v' : config.VERSION,
      'ld' : config.LOGDIR,
      'args' : string.join(map(commands.mkarg, ar_args))
      })

  restart_loop_webserver_config = 0
  if op == 'babysit':
    pidfile = E.GetPidFileName('loop_webserver_config')
    pid = E.ReadPidFile(pidfile)
    if os_utils.GetAttr('pid', pid=pid, fallback_to_ps=0) == None:
      restart_loop_webserver_config = 1

  if (install_state != "INSTALL" and
      (op == 'start' or restart_loop_webserver_config)):
    logging.info("Starting webserver_config")
    E.su_exe_or_fail(
      config.ENTERPRISE_USER,
      """ ps axwwwww | fgrep webserver_config.py | fgrep -v fgrep \
      | colrm 7 | xargs kill -9 2> /dev/null; \
      . %s; \
      cd %s/enterprise/legacy/scripts/ && \
      ENT_ID=%s_crawl ./loop_webserver_config.py %s \
      >> /%s/loop_WebserverConfig_`whoami` 2>&1 &""" % (
      config.ENTERPRISE_BASHRC,
      config.MAIN_GOOGLE3_DIR, config.VERSION,
      config.GetConfigFileName(), config.LOGDIR))
Ejemplo n.º 28
0
def HaltMachines(enthome, machines):
  'Stops and powers down machines.'

  logging.info("Halting machines: %s" % string.join(machines, ","))
  E.execute(machines, '/sbin/shutdown -h now &', None, 1)
  time.sleep(60)
  for machine in machines:
    SendAPCCommand(enthome, machine, APC_OFF)
  return 0
Ejemplo n.º 29
0
def HaltMachines(enthome, machines):
    'Stops and powers down machines.'

    logging.info("Halting machines: %s" % string.join(machines, ","))
    E.execute(machines, '/sbin/shutdown -h now &', None, 1)
    time.sleep(60)
    for machine in machines:
        SendAPCCommand(enthome, machine, APC_OFF)
    return 0
Ejemplo n.º 30
0
  def remove(self, machine):
    """  This removes a machine from the configuration  """

    if machine not in self.cfg.getGlobalParam('MACHINES'):
      logging.error("%s doesn't exist" % machine)
      return 1

    ver = self.cfg.getGlobalParam('VERSION')
    home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
    testver = install_utilities.is_test(ver)
    # if possible stop the core services, ignore return code
    install_utilities.stop_core(ver, home, [machine])

    if machine == E.getCrtHostName():
      logging.error("Cannot remove self")
      return 1

    # Halt the machine if APC is used.
    error = self.halt(machine)

    self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine)
    self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine)
    ret = core_utils.AddDeadNode(ver, testver, machine)
    # remove the chunkserver running on the node
    gfs_utils.DeleteGFSChunkservers(ver, testver, [machine])
    if ret:
      logging.error('Cannot add dead node to the lockserver.')
      # we ignore this error for now

    # now we need to remove the data disks that were on this machine
    data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS')
    if data_disks.has_key(machine):
      del data_disks[machine]
      if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks):
        return 1

    # This also saves the config file
    if not self.cfg.DoMachineAllocation():
      return 1

    # Now we need to restart babysitter because the old one
    # is out of sync after this
    serve_service_cmd = (". %s && "
        "cd %s/local/google3/enterprise/legacy/scripts && "
        "./serve_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME')))
    E.exe("%s %s" % (serve_service_cmd, "babysit"))

    self.restart_crawl_processes(serve_service_cmd)

    if not mail_already_sent(M.MSG_MACHINEREMOVED % machine):
      SendMail.send(self.cfg, None, false,
                 M.MSG_MACHINEREMOVED % machine, "", true)

    return error
Ejemplo n.º 31
0
def ResyncWithMaster(google_config_file, enterprise_home, enterprise_user,
                     master):

    scripts_dir = "%s/local/google3/enterprise/legacy/scripts" % enterprise_home

    enterprise_bashrc = "%s/local/conf/ent_bashrc" % enterprise_home
    E.su_exe_or_fail(
        enterprise_user,
        ". %s && cd %s && alarm 600 ./replicate_config.py %s %s" %
        (enterprise_bashrc, scripts_dir, master, google_config_file))
    SetNonMasterNTP(master)
Ejemplo n.º 32
0
def remove_unused_from(dirname, fileutil, grace_seconds):
    '''
  Get a list of all files in the given directory  that aren't opened and delete
  them.
  fileutil - full path of fileutil
  grace_seconds - Even if a file isn't currently opened we consider it being
                  in-use if it has been accessed recently (less this many
                  seconds ago)
  '''
    if not dirname:
        logging.error("Not given a directory to cleanup")
        return

    open_files_cmd = ("lsof +D %s -Fn" % dirname)
    (status, output) = E.getstatusoutput(open_files_cmd)

    #if status != E.ERR_OK:
    #  return
    # lsof doesn't return 0 even on success, so ignore it

    # lsof returns several lines for each file because multiple threads in a
    # process could have it open.  Get a list of unique files.
    open_files = {}
    for line in output.split():
        if line[0] == 'n':
            file = line[1:]
            open_files[file] = 1

    # Get a list of all files in the directory - not starting with .
    all_files = glob.glob("%s/*" % dirname)

    # Delete all unused files.
    for file in all_files:
        if file not in open_files:
            try:
                age = int(time.time()) - os.stat(file)[stat.ST_ATIME]
                if age > grace_seconds:
                    logging.info('Removing unused file %s' % file)
                    (s,
                     o) = E.getstatusoutput("%s rm -f %s" % (fileutil, file))
                    # If fileutil can't delete it for any reason, nuke it directly
                    # And its attribute file.
                    if os.path.exists(file):
                        os.remove(file)
                        os.remove(
                            '%s/.attr.plain.%s' %
                            (os.path.dirname(file), os.path.basename(file)))
                else:
                    logging.info('Ignoring unused file %s of age %s seconds' %
                                 (file, age))
                    continue
            except OSError:
                # File got deleted since we ran glob?  Ignore away.
                continue
Ejemplo n.º 33
0
 def TouchFile(global_params, filename):
   """ check to see if filename exists, create if it does not exists """
   # first check if file exists
   ls_cmd = "ls %s" % filename
   err, out = E.run_fileutil_command(self.globalParams, ls_cmd)
   if err != E.ERR_OK:
     # create if not exists
     create_cmd = "truncate %s 0" % filename
     err, out = E.run_fileutil_command(self.globalParams, create_cmd)
     if err != E.ERR_OK:
       logging.fatal("Could not create file: %s" % filename)
Ejemplo n.º 34
0
    def deactivate(self):
        """ Override this for some extra cleanup"""
        ent_service.ent_service.deactivate(self)

        # Remove cronjobs relating to this install version ONLY...
        tmp_cron_file = "%s/tmp/nobody_cron_crawl_" % self.ent_home
        E.exe_or_fail("""if $(/usr/bin/crontab -lu %s &>/dev/null);
    then crontab -lu %s | grep -v %s > %s; crontab -u %s %s; fi""" % \
            (self.ent_user, self.ent_user, self.entid_tag, tmp_cron_file,
             self.ent_user, tmp_cron_file))

        return 1
Ejemplo n.º 35
0
  def deactivate(self):
    """ Override this for some extra cleanup"""
    ent_service.ent_service.deactivate(self)

    # Remove cronjobs relating to this install version ONLY...
    tmp_cron_file = "%s/tmp/nobody_cron_crawl_" % self.ent_home
    E.exe_or_fail("""if $(/usr/bin/crontab -lu %s &>/dev/null);
    then crontab -lu %s | grep -v %s > %s; crontab -u %s %s; fi""" % \
        (self.ent_user, self.ent_user, self.entid_tag, tmp_cron_file,
         self.ent_user, tmp_cron_file))

    return 1
Ejemplo n.º 36
0
def main(argv):
  if len(argv) < 1:
    sys.exit(__doc__)

  pidfile = E.GetPidFileName('loop_AdminRunner')
  E.WritePidFile(pidfile)

  ENTERPRISE_HOME = commands.mkarg(argv[0])
  # ENTERPRISE_HOME are like "'/export/hda3/4.6.0.G.35'".
  # removing the "'" at the beginning and the end.
  ent_home = ENTERPRISE_HOME.split("'")[1]
  adminrunner_dir = ent_home + '/local/google3/enterprise/legacy/adminrunner'
  keys_dir = os.path.expanduser('~/.gnupg')
  restart_command = ['./adminrunner.py']
  restart_command.extend(argv[1:])
  restart_command.extend(['--log_dir=/export/hda3/logs',
                          '--box_keys_dir=' + keys_dir,
                          '--license_keys_dir=' + keys_dir])
  consecutive_kills_skipped = 0
  while True:
    if not check_healthz.CheckHealthz(2100):
      # If AdminRunner process exists, give it some time
      if reset_index.IsResetIndexInProgress(ent_home):
        # Everything may be slower during reset index.
        limit = MAX_RESET_INDEX_CONSECUTIVE_KILLS_TO_SKIP
      else:
        limit = MAX_CONSECUTIVE_KILLS_TO_SKIP
      if consecutive_kills_skipped < limit:
        # Check if adminrunner process exists
        (err, pidstring) = python_kill.Kill('adminrunner.py', '2100',
                                            print_only=1)
        # if there is no err and pidstring is '', adminrunner is gone.
        # otherwise, we assume it is alive and wait for a while
        if err != 0 or pidstring != '':
          consecutive_kills_skipped += 1
          time.sleep(CHECK_RESTART_INTERVAL)
          continue

      # Start/restart adminrunner, clear ResetIndexInProgress status
      consecutive_kills_skipped = 0
      reset_index.ClearResetIndexStatus(ent_home)
      print "Killing adminrunner..."
      python_kill.Kill('adminrunner.py', '2100')
      print "Restarting adminrunner..."
      subprocess.Popen(restart_command, cwd=adminrunner_dir)
      # Give it some extra time after startup.  Loading and validating
      # large number of collections and frontends with all large number of
      # keymatches etc can take a long time and adminrunner doesn't respond
      # to health checks during this time.
      time.sleep(SLEEP_AFTER_RESTART)
    else:
      consecutive_kills_skipped = 0
      time.sleep(CHECK_RESTART_INTERVAL)
Ejemplo n.º 37
0
 def TouchFile(global_params, filename):
     """ check to see if filename exists, create if it does not exists """
     # first check if file exists
     ls_cmd = "ls %s" % filename
     err, out = E.run_fileutil_command(self.globalParams, ls_cmd)
     if err != E.ERR_OK:
         # create if not exists
         create_cmd = "truncate %s 0" % filename
         err, out = E.run_fileutil_command(self.globalParams,
                                           create_cmd)
         if err != E.ERR_OK:
             logging.fatal("Could not create file: %s" % filename)
Ejemplo n.º 38
0
 def send_cmd(self, server_name, cmd):
   srvrs = self.cfg.globalParams.GetServerManager().Set(server_name).Servers()
   for srvr in srvrs:
     actual_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
                  "./port_talker.py %s %d '%s' %d" % (
                  self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                  self.cfg.entHome,
                  srvr.host(), srvr.port(), cmd, 60) # 1 min timeout
     err = E.execute([E.getCrtHostName()], actual_cmd, None, 0)
     if E.ERR_OK != err:
       logging.error("Error talking to server at %s:%d" % (srvr.host(),
                                                           srvr.port()))
   return true
Ejemplo n.º 39
0
def ResyncWithMaster(google_config_file,
                     enterprise_home,
                     enterprise_user,
                     master) :

  scripts_dir = "%s/local/google3/enterprise/legacy/scripts" % enterprise_home

  enterprise_bashrc = "%s/local/conf/ent_bashrc" % enterprise_home
  E.su_exe_or_fail(
    enterprise_user,
    ". %s && cd %s && alarm 600 ./replicate_config.py %s %s" %
    (enterprise_bashrc, scripts_dir, master, google_config_file))
  SetNonMasterNTP(master)
Ejemplo n.º 40
0
 def send_urlscheduler_command(self, command):
   schedulers = self.cfg.globalParams.GetServerHostPorts("urlscheduler")
   timeout = 60 # 1 minute is good enough
   for (machine, port) in schedulers:
     port_talker_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
           "./port_talker.py %s %d 'GET /run?Flags=%s\r\n\r\n' %d" % (
       self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
       self.cfg.entHome,
       machine, port, command, timeout)
     if E.ERR_OK != E.execute([E.getCrtHostName()], port_talker_cmd, None, 0):
       logging.error("Error talking to urlscheduler %s:%d" % (machine, port))
       return 1
   return 0
Ejemplo n.º 41
0
def remove_unused_from(dirname, fileutil, grace_seconds):
  '''
  Get a list of all files in the given directory  that aren't opened and delete
  them.
  fileutil - full path of fileutil
  grace_seconds - Even if a file isn't currently opened we consider it being
                  in-use if it has been accessed recently (less this many
                  seconds ago)
  '''
  if not dirname:
    logging.error("Not given a directory to cleanup")
    return

  open_files_cmd = ("lsof +D %s -Fn" % dirname)
  (status, output) = E.getstatusoutput(open_files_cmd)

  #if status != E.ERR_OK:
  #  return
  # lsof doesn't return 0 even on success, so ignore it

  # lsof returns several lines for each file because multiple threads in a
  # process could have it open.  Get a list of unique files.
  open_files = {}
  for line in output.split():
    if line[0] == 'n':
      file = line[1:]
      open_files[file] = 1

  # Get a list of all files in the directory - not starting with .
  all_files = glob.glob("%s/*" % dirname)

  # Delete all unused files.
  for file in all_files:
    if file not in open_files:
      try:
        age = int(time.time()) - os.stat(file)[stat.ST_ATIME]
        if age > grace_seconds:
          logging.info('Removing unused file %s' % file)
          (s, o) = E.getstatusoutput("%s rm -f %s" % (fileutil, file))
          # If fileutil can't delete it for any reason, nuke it directly
          # And its attribute file.
          if os.path.exists(file):
            os.remove(file)
            os.remove('%s/.attr.plain.%s' %
                (os.path.dirname(file), os.path.basename(file)))
        else:
          logging.info('Ignoring unused file %s of age %s seconds' % (file, age))
          continue
      except OSError:
        # File got deleted since we ran glob?  Ignore away.
        continue
Ejemplo n.º 42
0
 def send_cmd(self, server_name, cmd):
     srvrs = self.cfg.globalParams.GetServerManager().Set(
         server_name).Servers()
     for srvr in srvrs:
         actual_cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
                      "./port_talker.py %s %d '%s' %d" % (
                      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                      self.cfg.entHome,
                      srvr.host(), srvr.port(), cmd, 60) # 1 min timeout
         err = E.execute([E.getCrtHostName()], actual_cmd, None, 0)
         if E.ERR_OK != err:
             logging.error("Error talking to server at %s:%d" %
                           (srvr.host(), srvr.port()))
     return true
Ejemplo n.º 43
0
    def installcert(self):
        """ installs the staging certificate as the currently installed certificate
    returns:
    0 on success, and
    1 on failure
    """

        self.updatelock.acquire()
        try:

            # first verify that the staging certificate is a valid file
            verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )
            outputList = []
            verifycode = E.execute(["localhost"], verifycmd, outputList, 60)

            if verifycode != 0:
                E.rm(["localhost"], ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"))
                logging.error("Verify failed for certificate [%s]; error code: %d" % (str(outputList), verifycode))
                return "1"

            # distribute the staging certificate
            retcode = E.distribute(
                self.cfg.getGlobalParam("MACHINES"),
                ssl_cert.STAGINGCERT_FILENAME % self.cfg.getGlobalParam("ENTERPRISE_HOME"),
                60,
            )
            if retcode != 0:
                logging.error("Couldn't distribute apache cert, error %d" % retcode)

            # next, generate the certificate on all machines
            cmd = "secure_script_wrapper -p2 %s installcert %s" % (
                self.sslWrapperPath,
                self.cfg.getGlobalParam("ENTERPRISE_HOME"),
            )

            outputList = []
            retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList, 60)

            if retcode != 0:
                logging.error("Couldn't install cert: %s" % str(outputList))
                return "1"

            self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_CERT_INSTALLED)
        finally:
            self.updatelock.release()

        return "0"
Ejemplo n.º 44
0
def RebootMachine(enthome, machine):
    'Reboots a machine.'
    if machine == E.getCrtHostName():
        # Rebooting ourself
        logging.info('Rebooting %s' % machine)
        E.execute([E.LOCALHOST], '/sbin/shutdown -r now', None, 1)
        # If we're still alive after a minute , the APC will kick in
    else:
        # Try shutting down cleanly first
        logging.info('Shutting down %s' % machine)
        E.execute([machine], '/sbin/shutdown -h now &', None, 1)
    time.sleep(60)
    logging.info('Rebooting %s via APC' % machine)
    return SendAPCCommand(enthome, machine, APC_REBOOT)
Ejemplo n.º 45
0
def RebootMachine(enthome, machine):
  'Reboots a machine.'
  if machine == E.getCrtHostName():
    # Rebooting ourself
    logging.info('Rebooting %s' % machine)
    E.execute([E.LOCALHOST], '/sbin/shutdown -r now', None, 1)
    # If we're still alive after a minute , the APC will kick in
  else:
    # Try shutting down cleanly first
    logging.info('Shutting down %s' % machine)
    E.execute([machine], '/sbin/shutdown -h now &', None, 1)
  time.sleep(60)
  logging.info('Rebooting %s via APC' % machine)
  return SendAPCCommand(enthome, machine, APC_REBOOT)
Ejemplo n.º 46
0
def check_klogd_syslogd_conf(machines, enthome, unittestdir=None):
  """ babysit klogd.conf and syslogd.conf file

  Recreate klogd.conf and syslogd.conf if they are not in the dir.
  Args:
    machines: ['ent1', 'ent2', 'ent3', 'ent4', 'ent5']
    enthome: '/export/hda3/4.6.0.G.27/'
    unittestdir: '/tmp/etc/localbabysitter.d/' -- used for unittest only
  """
  KLOGD_CONF_DATA = (
    "klogd = {\n"
    "  restart_command : '/sbin/service syslog restart',\n"
    "  timeout : 30,\n"
    "  interval : 30,\n"
    "  use_service_wrapper : 0,\n"
    "  pidfile : '/var/run/klogd.pid',\n"
    "}\n"
  )

  SYSLOGD_CONF_DATA = (
    "syslogd = {\n"
    "  restart_command : '/sbin/service syslog restart',\n"
    "  timeout : 30,\n"
    "  interval : 30,\n"
    "  use_service_wrapper : 0,\n"
    "  pidfile : '/var/run/syslogd.pid',\n"
    "}\n"
  )

  CHECK_CREATE_CMD = (
    'if [ ! -e "%(file)s" ]\n'
    'then\n'
    '  echo "%(data)s" > %(file)s\n'
    '  chmod 644 %(file)s\n'
    'fi\n'
  )

  if unittestdir is None:
    dir = '/etc/localbabysitter.d/'
  else:
    dir = unittestdir
  file_info = {'klogd.conf':KLOGD_CONF_DATA, 'syslogd.conf':SYSLOGD_CONF_DATA }
  for fname, data in file_info.items():
    file = os.path.join(dir, fname)
    cmd = CHECK_CREATE_CMD % {'data': data, 'file': file}
    if unittestdir is None:
      E.execute(machines, cmd, [], 0, 0, 0, enthome)
    else:
      os.system(cmd)
Ejemplo n.º 47
0
def MasterKillMyself(config):

    util_dir = "%s/local/google3/enterprise/legacy/util" % config.ENTERPRISE_HOME

    # Stop AdminRunner
    for script in [
            "loop_AdminRunner.py", "adminrunner.py",
            "loop_webserver_config.py", "webserver_config.py"
    ]:
        E.su_exe_or_fail(config.ENTERPRISE_USER,
                         ". %s && cd %s && ./python_kill.py --binname=%s "\
                         "--kill_by_group" %
                         (config.ENTERPRISE_BASHRC, util_dir, script))

    KillProcessIfNotMaster(config)
Ejemplo n.º 48
0
  def installcert(self):
    """ installs the staging certificate as the currently installed certificate
    returns:
    0 on success, and
    1 on failure
    """

    self.updatelock.acquire()
    try:

      # first verify that the staging certificate is a valid file
      verifycmd = "secure_script_wrapper -p2 %s verifystagingcert %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME") )
      outputList = []
      verifycode = E.execute(['localhost'], verifycmd, outputList, 60)

      if verifycode != 0:
        E.rm(['localhost'],
             ssl_cert.STAGINGCERT_FILENAME %
             self.cfg.getGlobalParam("ENTERPRISE_HOME"))
        logging.error("Verify failed for certificate [%s]; error code: %d" %
                      (str(outputList), verifycode) )
        return "1"

      # distribute the staging certificate
      retcode = E.distribute(self.cfg.getGlobalParam("MACHINES"),
                             ssl_cert.STAGINGCERT_FILENAME %
                             self.cfg.getGlobalParam("ENTERPRISE_HOME"), 60)
      if retcode != 0:
        logging.error("Couldn't distribute apache cert, error %d" % retcode)

      # next, generate the certificate on all machines
      cmd = "secure_script_wrapper -p2 %s installcert %s" % (
        self.sslWrapperPath, self.cfg.getGlobalParam("ENTERPRISE_HOME"))

      outputList = []
      retcode = E.execute(self.cfg.getGlobalParam("MACHINES"), cmd, outputList,
                          60)

      if retcode != 0:
        logging.error("Couldn't install cert: %s" % str(outputList))
        return "1"

      self.writeAdminRunnerOpMsg(M.MSG_LOG_SSL_CERT_INSTALLED)
    finally:
      self.updatelock.release()

    return "0"
Ejemplo n.º 49
0
 def GetLanguageCatalog(self, language):
     '''Returns a dictionary from 'MSG_...' to translated string'''
     enthome = E.getEnterpriseHome()
     languagelocal = language.replace('-', '_')
     languagefile = (
         '%s/local/google3/enterprise/i18n/FrontendMessages_%s.xlb' %
         (enthome, languagelocal))
     try:
         msgs = readxlb.Read(languagefile)
     except IOError:
         logging.error('Missing language file %s' % languagefile)
         if language != 'en':
             # Try en language so we can at least replace the tags.
             return self.GetLanguageCatalog('en')
         # Sorry, no language files
         msgs = {}
     # Handle right-to-left languages
     if language in ['ar', 'fa', 'iw', 'ku', 'sd', 'ur', 'yi']:
         msgs['DIR'] = 'rtl'
     else:
         msgs['DIR'] = 'ltr'
     # Encode Unicode messages in UTF-8.  (Adminrunner expects UTF-8 messages.)
     for key in msgs:
         msgs[key] = msgs[key].encode('utf-8')
     # Stick in the language list inside the advanced search page in
     # sorted order.
     msgs["INSERT_LANGUAGE_LIST_IN_ADV_SEARCH"] = self.GenerateLanguageList(
         msgs)
     return msgs
Ejemplo n.º 50
0
def renice_svs(machine, new_priority=SVS_NICE_VALUE):
    """Renices svs on 'machine' with new_priority.

  Returns 0 on failure, 1 on success.
  """
    pid = None
    ret = 0
    # Get pid of svs.
    try:
        pid = open(SVS_PID_FILE, 'r').read()
    except:
        err = str(sys.exc_info()[:2])
        logging.error('Error reading SVS pid from %s: %s' %
                      (SVS_PID_FILE, err))

    if pid is not None:
        # Get nice value for pid
        old_priority = os_utils.GetAttr('nice', int(pid))
        if old_priority is not None and int(old_priority) != new_priority:
            logging.info('Renicing SVS to %d.' % new_priority)
            # Get group id from pid.
            pgid = os_utils.GetAttr('pgid', int(pid))
            # Renice the whole group.
            cmd = 'renice %d -g %d' % (new_priority, int(pgid))
            rc = E.execute([machine], cmd, None, 1)
            if rc == 0:
                ret = 1
            else:
                logging.error('Error renicing SVS: %d' % ret)
    return ret
Ejemplo n.º 51
0
 def distributedeletedbfile(self, filename):
   # Remove local database files from all the nodes.
   machines = self.cfg.getGlobalParam("MACHINES")
   if not E.rm(machines, filename):
     logging.error("Failed to delete %s on %s" % (filename, machines))
     return "1"
   return "0"
Ejemplo n.º 52
0
def get_core_states(ver, home, nodes, ignore=0, testver=None):
  """ get the ENT_CORE_STATE from all nodes

  Running "ent_core --ver --info" on all the nodes sequentially

  Arguments:
    ver: '4.6.5'
    home: '/export/hda3/4.6.5'
    nodes: ['ent1', 'ent2']
    ignore: 1 - ignore errors; 0 - otherwise.
    testver: 1 - if this is a test version; 0 - otherwise.

  Returns:
    {'ent1': 'state=RUNNING\nnodes=5\nfailures=1',
     'ent2': 'state=RUNNING\nnodes=5\nfailures=1'}

  """

  if testver == None:
    testver = is_test(ver)
  states = {}
  core_base = ('/export/hda3/%s/local/google/bin/secure_script_wrapper '
               '-e /export/hda3/%s/bin/ent_core --ver=%s' % (ver, ver, ver))
  if testver:
    core_base = '%s --testver' % core_base
  cmd = '%s --info'  % core_base
  for node in nodes:
    out = []
    ret = E.execute([node], cmd, out, 0, 0, 0, home, ignore=ignore)
    if not ret:
      states[node] = ''.join(out)
  return states
Ejemplo n.º 53
0
def core_op_out(ver, home, op, nodes, ignore=0, testver=None, gfs=1):
  """Executes ent_core command and returns the result.

  ent_core is running at the same time on all the nodes in different threads.

  Arguments:
    ver: '4.6.5'
    home: '/export/hda3/4.6.5'
    op: 'info'
    nodes: ['ent1', 'ent2']
    ignore: 1 - ignore errors; 0 - otherwise.
    testver: 1 - if this is a test version; 0 - otherwise.
    gfs:     1 - start gfs during activation; 0 - otherwise.

  Returns:
    error code and output from all nodes.
    (0,
     'state=RUNNING\nnodes=5\nfailures=1\nstate=RUNNING\nnodes=5\nfailures=1')
  """

  if testver == None:
    testver = is_test(ver)
  out = []
  core_base = ('/export/hda3/%s/local/google/bin/secure_script_wrapper '
               '-e /export/hda3/%s/bin/ent_core --ver=%s' % (ver, ver, ver))
  if testver:
    core_base = '%s --testver' % core_base
  if gfs == 0:
    core_base = '%s --gfs=0' % core_base
  cmd = '%s --%s'  % (core_base, op)
  ret = E.execute(nodes, cmd, out, 0, 0, 0, home, ignore=ignore)
  if ret:
    logging.error(''.join(out))
  return ret, ''.join(out)