def _RemoveTopLevelDirs(cfg, dir_root, gfs_aliases=None, datadir=None,
                        unrecoverable_dirs=[]):
  """ Remove top level dirs/files under dir_root one by one in a loop
  Parameters:
    cfg: entconfig.EntConfig(entHome, 1)
    dir_root: '/gfs/ent/'
    gfs_aliases: 'ent=master.ent.gfs.ent4-6-0-G-27.ls.google.com:3830'
                  -- for gfs only
    datadir: '/export/hda3/4.6.0.G.27/data/enterprise-data'
             -- for bigfile only
    unrecoverable_dirs: []
                        -- introduced for unittests.
  Returns:
    Dirs under dir_root remain to be removed
  """

  def _RemoveDir(dir, cmd_args):
    """ using fileutil to remove a dir

    First do "rm" without the "-a" flag. If it does not
    work, try the "-a" flag.
    Args:
      dir: '/gfs/ent/base-indexer000-of-003/global-anchor000-of-003'
      cmd_args: '--gfs_aliases=ent=master.ent.gfs.ent4-6-0-G-27.ls.'
                'google.com:3830'
    Return:
      1 - removed successfully
      0 - otherwise
    """
    fileutil_args = ['', '-a']
    for fileutil_arg in fileutil_args:
      cmd = ('fileutil %s rm %s -R -f %s' % (cmd_args, fileutil_arg, dir))
      if _ExecuteCommand(cmd, error_filter=_NoMatchOk) == 0:
        return 1
    return 0

  top_level_dirs = _TopLevelDirsToRemove(cfg, dir_root,
                                         gfs_aliases=gfs_aliases,
                                         datadir=datadir,
                                         unrecoverable_dirs=unrecoverable_dirs)
  # 'fileutil ls' failed, sacrifice unrecoverable_dirs, try removing all
  if len(top_level_dirs) == 0:
    top_level_dirs = ['%s*' % dir_root]
  max_tries = 3
  count = 0
  cmd_args = _ComposeFileutilArgs(gfs_aliases, datadir)
  # retry until no dir not removed or hit max_tries
  while len(top_level_dirs) > 0 and count < max_tries:
    count += 1
    dirs_not_removed = []
    for dir in top_level_dirs:
      if _RemoveDir(dir, cmd_args) == 0:
        dirs_not_removed.append(dir)
    top_level_dirs = dirs_not_removed
    logging.flush()

  return top_level_dirs
def _Reactivate(cfg, version):
  """Reactivate the serve service.
  Returns '' or error string. """
  logging.info('Reset Index: Reactivate')
  logging.flush()
  result = ''
  if _RunServeCmd(cfg, version, 'start'):
    result = 'Startup failed.'
  # Need to activate on all nodes; if we switched masters, we may have
  # deactivated multiple nodes.
  if _RunServeCmd(cfg, version, 'activate', allnodes=1):
    result = 'Activate failed.'
  return result
def RunCommand(command):
  """Simple command wrapper that checks exit status and logs/raises error
     if the command fails.  Logs and times commands that succeed.
  Args: command - string holding a command to run
  Returns: None
  """
  start_time = time.time()
  if os.system(command):
    logging.error('Command failure:    %s' % command)
    raise CommandError, 'cannot complete command: %s' % command
  logging.info('Command successful (took %f sec): %s' %
                                      ((time.time()-start_time), command))
  logging.flush()
Exemple #4
0
def RunCommand(command):
    """Simple command wrapper that checks exit status and logs/raises error
     if the command fails.  Logs and times commands that succeed.
  Args: command - string holding a command to run
  Returns: None
  """
    start_time = time.time()
    if os.system(command):
        logging.error('Command failure:    %s' % command)
        raise CommandError, 'cannot complete command: %s' % command
    logging.info('Command successful (took %f sec): %s' %
                 ((time.time() - start_time), command))
    logging.flush()
def _Reactivate(cfg, version):
    """Reactivate the serve service.
  Returns '' or error string. """
    logging.info('Reset Index: Reactivate')
    logging.flush()
    result = ''
    if _RunServeCmd(cfg, version, 'start'):
        result = 'Startup failed.'
    # Need to activate on all nodes; if we switched masters, we may have
    # deactivated multiple nodes.
    if _RunServeCmd(cfg, version, 'activate', allnodes=1):
        result = 'Activate failed.'
    return result
def _Inactivate(cfg, version):
  """Inactivate serve_service and bring up gfs
  Return: '' for success, error for failure
  """
  logging.info('Reset Index: Inactivate')
  logging.flush()
  status = _RunServeCmd(cfg, version, 'deactivate', allnodes=1)
  if status:
    return 'Deactivate failure.'
  status = _RunServeCmd(cfg, version, 'stop', allnodes=1)
  if status:
    return 'Stop failure.'
  time.sleep(60)  # Sleep a bit
  return ''
def _Inactivate(cfg, version):
    """Inactivate serve_service and bring up gfs
  Return: '' for success, error for failure
  """
    logging.info('Reset Index: Inactivate')
    logging.flush()
    status = _RunServeCmd(cfg, version, 'deactivate', allnodes=1)
    if status:
        return 'Deactivate failure.'
    status = _RunServeCmd(cfg, version, 'stop', allnodes=1)
    if status:
        return 'Stop failure.'
    time.sleep(60)  # Sleep a bit
    return ''
def _ClearIndex(cfg, version):
    """Clear the index directories in GFS/bigfiles.
  Return: '' for success, error for failure
  """
    logging.info('Reset Index: ClearIndex')
    logging.flush()

    # Delete (local) urltracker data on oneway.
    urltracker_dir = cfg.getGlobalParam('URLTRACKER_DIRECTORY')
    if os.access(urltracker_dir, os.R_OK):
        cmd = ('rm -R -f %s' % (urltracker_dir))
        logging.info('Deleting local urltracker directory: %s' %
                     (urltracker_dir))
        if _ExecuteCommand(cmd, machines=cfg.getGlobalParam(C.MACHINES)):
            return 'File removal failed.'
    else:
        logging.info('No local urltracker data to delete')

    if cfg.getGlobalParam(C.GFS_CELL):
        logging.info('Deleting GFS files')
        gfs_aliases = core_utils.GetGFSAliases(
            version, install_utilities.is_test(version))
        dirs_not_removed = _RemoveTopLevelDirs(cfg,
                                               '/gfs/ent/',
                                               gfs_aliases=gfs_aliases)
        if len(dirs_not_removed) > 0:
            return 'Shared file removal failed.'

    logging.info('Deleting bigfiles')
    datadir = '%s/data/enterprise-data' % cfg.getGlobalParam('ENTERPRISE_HOME')
    dirs_not_removed = _RemoveTopLevelDirs(cfg, '/bigfile/', datadir=datadir)
    if len(dirs_not_removed) > 0:
        return 'File removal failed.'

    # delete spelling data on oneway:
    spell_root = cfg.getGlobalParam('ENT_SPELL_ROOT_DIR')
    if spell_root[-1] == '/':
        spell_root = spell_root[:-1]

    if os.access(spell_root, os.R_OK):
        cmd = ('rm -R -f %s' % spell_root)
        logging.info('Deleting local (non-gfs) spelling data')
        if _ExecuteCommand(cmd, machines=cfg.getGlobalParam(C.MACHINES)):
            return 'File removal failed.'
    else:
        logging.info('No local (non-gfs) spelling data to delete')

    return ''
def _ClearIndex(cfg, version):
  """Clear the index directories in GFS/bigfiles.
  Return: '' for success, error for failure
  """
  logging.info('Reset Index: ClearIndex')
  logging.flush()

  # Delete (local) urltracker data on oneway.
  urltracker_dir = cfg.getGlobalParam('URLTRACKER_DIRECTORY')
  if os.access(urltracker_dir, os.R_OK):
    cmd = ('rm -R -f %s' % (urltracker_dir))
    logging.info('Deleting local urltracker directory: %s' % (urltracker_dir))
    if _ExecuteCommand(cmd, machines=cfg.getGlobalParam(C.MACHINES)):
      return 'File removal failed.'
  else:
    logging.info('No local urltracker data to delete')

  if cfg.getGlobalParam(C.GFS_CELL):
    logging.info('Deleting GFS files')
    gfs_aliases = core_utils.GetGFSAliases(version,
                                           install_utilities.is_test(version))
    dirs_not_removed = _RemoveTopLevelDirs(cfg, '/gfs/ent/', gfs_aliases=gfs_aliases)
    if len(dirs_not_removed) > 0:
      return 'Shared file removal failed.'

  logging.info('Deleting bigfiles')
  datadir = '%s/data/enterprise-data' % cfg.getGlobalParam('ENTERPRISE_HOME')
  dirs_not_removed = _RemoveTopLevelDirs(cfg, '/bigfile/', datadir=datadir)
  if len(dirs_not_removed) > 0:
    return 'File removal failed.'

  # delete spelling data on oneway:
  spell_root = cfg.getGlobalParam('ENT_SPELL_ROOT_DIR')
  if spell_root[-1] == '/':
    spell_root = spell_root[:-1]

  if os.access(spell_root, os.R_OK):
    cmd = ('rm -R -f %s' % spell_root);
    logging.info('Deleting local (non-gfs) spelling data')
    if _ExecuteCommand(cmd, machines=cfg.getGlobalParam(C.MACHINES)):
      return 'File removal failed.'
  else:
    logging.info('No local (non-gfs) spelling data to delete')

  return ''
def _ReinitializeIndex(cfg, _):
  """Reinitialize the index directories in GFS/bigfiles.
  Returns '' or error string.
  """
  logging.info('Reset Index: ReinitializeIndex')
  logging.flush()
  result = ''
  # Create GFS subdirectories
  if cfg.getGlobalParam(C.GFS_CELL):
    logging.info('creating gfs subdirectories')
    if not cfg.createGFSChunkSubDirs(reset_index=1):
      result = 'Failed creating new shared files'

  # create some initial files needed by various backend
  logging.info('Creating default backend files ...')
  cfg.CreateDefaultBackendFiles()

  # ensure initial spelling data is present
  cfg.EnsureSpellingData(reset = 1)

  return result
def _ReinitializeIndex(cfg, _):
    """Reinitialize the index directories in GFS/bigfiles.
  Returns '' or error string.
  """
    logging.info('Reset Index: ReinitializeIndex')
    logging.flush()
    result = ''
    # Create GFS subdirectories
    if cfg.getGlobalParam(C.GFS_CELL):
        logging.info('creating gfs subdirectories')
        if not cfg.createGFSChunkSubDirs(reset_index=1):
            result = 'Failed creating new shared files'

    # create some initial files needed by various backend
    logging.info('Creating default backend files ...')
    cfg.CreateDefaultBackendFiles()

    # ensure initial spelling data is present
    cfg.EnsureSpellingData(reset=1)

    return result
def _TopLevelDirsToRemove(cfg, dir_root, gfs_aliases=None, datadir=None,
                          unrecoverable_dirs=[]):
  """ top level dirs/files under dir_root

  Trying to find out all the dirs/files under dir_root to be removed, given
  unrecoverable_dirs.
  Note:
    fileutil has different behavior for gfs, bigfile, and local files.
    Sometimes, the "fileuitl ls" will give errors without the "-a" flag.
    However, with the "-a" flag, some unwanted directories are also returned.
    So the idea here is to first try without the "-a" flag. If it fails,
    try the "-a" flag. For every dir it returns, make sure it is under
    dir_root.
  Parameters:
    cfg: entconfig.EntConfig(entHome, 1)
    dir_root: '/gfs/ent/'
    gfs_aliases: 'ent=master.ent.gfs.ent4-6-0-G-27.ls.google.com:3830'
                  -- for gfs only
    datadir: '/export/hda3/4.6.0.G.27/data/enterprise-data'
             -- for bigfile only
    unrecoverable_dirs: []
                        -- introduced for unittests.
  Returns:
    ['/bigfile/pr_stats_shard000', '/bigfile/pr_test_00_00', ...]
  """

  # get all the top level GFS dirs
  if gfs_aliases is not None:
    unrecoverable_dirs.extend(_UnrecoverableGFSDirs(cfg))
  if dir_root not in unrecoverable_dirs:
    unrecoverable_dirs.append(dir_root)
  # first try without the "-a" option. If it does not work, try the "-a" option
  cmd_args = _ComposeFileutilArgs(gfs_aliases, datadir)
  fileutil_args = ['', '-a']
  timeout=20*60
  top_level_dirs_to_remove = []
  for fileutil_arg in fileutil_args:
    # unfortunately, fileutil in 4.6.4 works differently on gfs and bigfile.
    # it has to use "/gfs/ent/" and "/bigfile/*".
    if gfs_aliases is not None:
      cmd = ('fileutil %s ls %s %s' % (cmd_args, fileutil_arg, dir_root))
    else:
      cmd = ('fileutil %s ls %s %s*' % (cmd_args, fileutil_arg, dir_root))
    out = []
    status = _ExecuteCommand(cmd, timeout=15*60, out=out, error_filter=_NoMatchOk)
    if status == 0 and len(out) >= 1:
      top_level_dirs = out[0].splitlines()
      # don't remove unrecoverable dirs. Also noise may exist in the result.
      for dir in top_level_dirs:
        if not dir.endswith('..'):
          dir = E.normpath(dir)
          if (dir.startswith(dir_root) and
              _IsRecoverable(dir, unrecoverable_dirs)):
            top_level_dirs_to_remove.append(dir)
      if len(top_level_dirs_to_remove) > 0:
        break
      else:
        timeout *= 2
  logging.info('Reset Index: Top Level Dirs to Remove:')
  logging.info('      %s' % top_level_dirs_to_remove)
  logging.flush()
  return top_level_dirs_to_remove
def _RemoveTopLevelDirs(cfg,
                        dir_root,
                        gfs_aliases=None,
                        datadir=None,
                        unrecoverable_dirs=[]):
    """ Remove top level dirs/files under dir_root one by one in a loop
  Parameters:
    cfg: entconfig.EntConfig(entHome, 1)
    dir_root: '/gfs/ent/'
    gfs_aliases: 'ent=master.ent.gfs.ent4-6-0-G-27.ls.google.com:3830'
                  -- for gfs only
    datadir: '/export/hda3/4.6.0.G.27/data/enterprise-data'
             -- for bigfile only
    unrecoverable_dirs: []
                        -- introduced for unittests.
  Returns:
    Dirs under dir_root remain to be removed
  """
    def _RemoveDir(dir, cmd_args):
        """ using fileutil to remove a dir

    First do "rm" without the "-a" flag. If it does not
    work, try the "-a" flag.
    Args:
      dir: '/gfs/ent/base-indexer000-of-003/global-anchor000-of-003'
      cmd_args: '--gfs_aliases=ent=master.ent.gfs.ent4-6-0-G-27.ls.'
                'google.com:3830'
    Return:
      1 - removed successfully
      0 - otherwise
    """
        fileutil_args = ['', '-a']
        for fileutil_arg in fileutil_args:
            cmd = ('fileutil %s rm %s -R -f %s' %
                   (cmd_args, fileutil_arg, dir))
            if _ExecuteCommand(cmd, error_filter=_NoMatchOk) == 0:
                return 1
        return 0

    top_level_dirs = _TopLevelDirsToRemove(
        cfg,
        dir_root,
        gfs_aliases=gfs_aliases,
        datadir=datadir,
        unrecoverable_dirs=unrecoverable_dirs)
    # 'fileutil ls' failed, sacrifice unrecoverable_dirs, try removing all
    if len(top_level_dirs) == 0:
        top_level_dirs = ['%s*' % dir_root]
    max_tries = 3
    count = 0
    cmd_args = _ComposeFileutilArgs(gfs_aliases, datadir)
    # retry until no dir not removed or hit max_tries
    while len(top_level_dirs) > 0 and count < max_tries:
        count += 1
        dirs_not_removed = []
        for dir in top_level_dirs:
            if _RemoveDir(dir, cmd_args) == 0:
                dirs_not_removed.append(dir)
        top_level_dirs = dirs_not_removed
        logging.flush()

    return top_level_dirs
def _TopLevelDirsToRemove(cfg,
                          dir_root,
                          gfs_aliases=None,
                          datadir=None,
                          unrecoverable_dirs=[]):
    """ top level dirs/files under dir_root

  Trying to find out all the dirs/files under dir_root to be removed, given
  unrecoverable_dirs.
  Note:
    fileutil has different behavior for gfs, bigfile, and local files.
    Sometimes, the "fileuitl ls" will give errors without the "-a" flag.
    However, with the "-a" flag, some unwanted directories are also returned.
    So the idea here is to first try without the "-a" flag. If it fails,
    try the "-a" flag. For every dir it returns, make sure it is under
    dir_root.
  Parameters:
    cfg: entconfig.EntConfig(entHome, 1)
    dir_root: '/gfs/ent/'
    gfs_aliases: 'ent=master.ent.gfs.ent4-6-0-G-27.ls.google.com:3830'
                  -- for gfs only
    datadir: '/export/hda3/4.6.0.G.27/data/enterprise-data'
             -- for bigfile only
    unrecoverable_dirs: []
                        -- introduced for unittests.
  Returns:
    ['/bigfile/pr_stats_shard000', '/bigfile/pr_test_00_00', ...]
  """

    # get all the top level GFS dirs
    if gfs_aliases is not None:
        unrecoverable_dirs.extend(_UnrecoverableGFSDirs(cfg))
    if dir_root not in unrecoverable_dirs:
        unrecoverable_dirs.append(dir_root)
    # first try without the "-a" option. If it does not work, try the "-a" option
    cmd_args = _ComposeFileutilArgs(gfs_aliases, datadir)
    fileutil_args = ['', '-a']
    timeout = 20 * 60
    top_level_dirs_to_remove = []
    for fileutil_arg in fileutil_args:
        # unfortunately, fileutil in 4.6.4 works differently on gfs and bigfile.
        # it has to use "/gfs/ent/" and "/bigfile/*".
        if gfs_aliases is not None:
            cmd = ('fileutil %s ls %s %s' % (cmd_args, fileutil_arg, dir_root))
        else:
            cmd = ('fileutil %s ls %s %s*' %
                   (cmd_args, fileutil_arg, dir_root))
        out = []
        status = _ExecuteCommand(cmd,
                                 timeout=15 * 60,
                                 out=out,
                                 error_filter=_NoMatchOk)
        if status == 0 and len(out) >= 1:
            top_level_dirs = out[0].splitlines()
            # don't remove unrecoverable dirs. Also noise may exist in the result.
            for dir in top_level_dirs:
                if not dir.endswith('..'):
                    dir = E.normpath(dir)
                    if (dir.startswith(dir_root)
                            and _IsRecoverable(dir, unrecoverable_dirs)):
                        top_level_dirs_to_remove.append(dir)
            if len(top_level_dirs_to_remove) > 0:
                break
            else:
                timeout *= 2
    logging.info('Reset Index: Top Level Dirs to Remove:')
    logging.info('      %s' % top_level_dirs_to_remove)
    logging.flush()
    return top_level_dirs_to_remove
 def FlushLogHandler(self):
     logging.flush()
     return ''
def StartupWork(cfg, state):

  try:
    #############################################################################
    # check memory-total
    logging.info("check memory-total")
    if not cfg.CheckMachineMemory():
      logging.fatal("failed to check memory-total")
      return

    # One-time Initialization Work:
    # In fresh mode we first initialize the global default
    if install_utilities.GetInitState(cfg.globalParams) == C.FRESH:
      logging.info("initializing global default files")
      if not cfg.globalParams.InitGlobalDefaultFiles(overwrite=false):
        logging.fatal("failed to initialize global default files")
        return
      # Call to saveParams will also distribute the newly created global
      # default files.
      logging.info("saving params and distributing global default files.")
      logging.flush()
      # This is a best effort mechanism. We try upto 6 times to update the
      # files. We assume that we'll get the files to all alive machines. If
      # a machine can't get the files then we assume that it is dead and won't
      # become a master with missing default files.
      cfg.saveParams(retry=6)
      logging.flush()
      install_utilities.SetInitState(cfg, C.CONFIG_FILES_INITIALIZED)
      logging.info('system initialization for %s state successful' %
          C.CONFIG_FILES_INITIALIZED)

    logging.flush()
    # In install mode -- bail out now
    if state == "INSTALL":
      cfg.startupDone()
      return

    # Perform reset index if we got interrupted in the middle of it
    reset_index.ResetIndex(cfg, only_if_in_progress=1)

    try:
      localhost =  E.getCrtHostName()
      if not cfg.setGlobalParam('MASTER', localhost):
        logging.fatal("Error setting the MASTER to %s" % localhost)
    except IOError:
      logging.fatal("Couldn't set MASTER -> exiting")

    # Allocate machines for the empty slots
    logging.info("doing machine allocation")
    if not cfg.DoMachineAllocation():
      logging.info("failed to do machine allocation")

    # In the next install stage (INSTALL) mode we go and finish up the work
    if (install_utilities.GetInitState(cfg.globalParams) ==
          C.CONFIG_FILES_INITIALIZED):
      logging.info("doing system initialization")

      # datadirs are initially made (by the base rpm) to not have any data disks
      # because it doesn't know which disks to use, but the datadir is needed
      # to get things running.
      logging.info("creating datadirs")
      if not cfg.createDataDirs(cfg.getGlobalParam(C.MACHINES)):
        logging.fatal("failed to make datadirs on machines")
        return

      # Create GFS subdirectories
      if cfg.getGlobalParam(C.GFS_CELL):
        logging.info("creating gfs subdirectories")
        if not cfg.createGFSChunkSubDirs():
          logging.fatal("failed creating the gfs subdirectories")
          return

      # ensure initial spelling data is present
      cfg.EnsureSpellingData()

      # create a default collection w/content of mainCrawl / frontend
      logging.info("Creating default collection ...")
      cfg.CreateDefaultCollection()
      logging.info("Creating default frontend ...")
      cfg.CreateDefaultFrontend()
      logging.info("Assiging default collection and frontend ...")
      cfg.AssignDefaultCollAndFront()

      # create some initial files needed by various backend
      logging.info("Creating default backend files ...")
      cfg.CreateDefaultBackendFiles()

      # create built in query expansion entry
      logging.info("Creating default query expansion entry ...")
      cfg.CreateDefaultQueryExpEntry()

      install_utilities.SetInitState(cfg, C.INITIALIZED)
      logging.info("system initialization successful")
      # we start the serve_service as this is fresh install
      logging.info('Starting serve service...')
      RunServeService(cfg.globalParams, 'start')
    else:
      # we restart the babysitter because the servers map may have changed
      logging.info('Babysitting serve service after allocation...')
      RunServeService(cfg.globalParams, 'babysit')

    logging.info("Saving params")
    cfg.saveParams()

    # now we're ready to run; end startup mode
    logging.info("Done with startup")
    cfg.startupDone()

  except Exception, e:
    (t, v, tb) = sys.exc_info()
    exc_msg = string.join(traceback.format_exception(t, v, tb))
    logging.error(exc_msg)
    logging.fatal("StartupWork failed with exception: %s; %s" % (
      e, traceback.format_tb(tb)))
 def FlushLogHandler(self):
   logging.flush()
   return ''