Ejemplo n.º 1
0
def CollectLogs(all_machines, gws_log_dir, log_collect_dir):
  # We only run this on oneway or master node of cluster.
  master = find_master.FindMaster(2100, all_machines)
  crt_machine = E.getCrtHostName()
  if len(all_machines) != 1 and (len(master) != 1 or master[0] != crt_machine):
    logging.info('Not a oneway or cluster master node. Return!')
    return

  lockfile = '%s/lock' % log_collect_dir
  # waiting up to 5 minutes for the lock.
  lock = E.acquire_lock(lockfile, 30, breakLockAfterGracePeriod = 0)
  if lock == None:
    logging.info('Cannot grab the lock. Return!')
    return

  try:
    for machine in all_machines:
      src_pattern = '%s/partnerlog.*' % gws_log_dir
      dest_dir = '%s/%s' % (log_collect_dir, machine)

      # If it's a oneway or master node, we make a symlink to gws_log_dir instead
      # of rsync to log_collect directory
      if machine == crt_machine:
        # To make it backward compatible, we need to remove old dest_dir if it's
        # already an existing directory from previous version because in previous
        # versions we created a dir and rsynced files even on the master node and
        # one-ways.
        if os.path.exists(dest_dir) and not os.path.islink(dest_dir):
          if not E.rm(master, '%s/*' % dest_dir) or not E.rmdir(master, dest_dir):
            logging.error('Directory %s exists and cannot be cleaned.', dest_dir)
            continue
          logging.info('Cleaned existing directory %s.', dest_dir)

        if E.ln(master, gws_log_dir, dest_dir):
          logging.info('Symlink %s to directory %s:%s for logs' %
                       (dest_dir, machine, gws_log_dir))
        else:
          logging.error('Cannot make a symlink from %s to %s' %
                        (dest_dir, gws_log_dir))
        continue

      # For non-master nodes on cluster, we need to rsync those files to master node
      logging.info('Collecting logs from %s:%s into %s' % (
        machine, src_pattern, dest_dir))

      # make log directories if needed
      liblog.MakeDir(dest_dir)

      # rsync all files from one remote machine in one command.
      rsync_cmd = 'rsync --timeout=60 --size-only -vau ' \
                  ' -e ssh %s:%s %s/' % (machine, src_pattern, dest_dir)

      # rsync the logs
      (status, output) = liblog.DoCommand(rsync_cmd)
      if status != 0:
        logging.error('Failed to collect logs from %s: %s' % (
          machine, output))
  finally:
    lock.close()
    os.unlink(lockfile)
Ejemplo n.º 2
0
def create(linkFarmDir, chunkDisksMap, chunkPrefix, chunkTermination, binDir, machines):
    """
  This does the create job.
   @param linkFarmDir the directory where is the link frm. Here we create
     a data link, a log dir and log dir
   @param chunkDisks the map from machines to datadisks
   @param chunkPrefix
   @param chunkTermination the dir termination
   @param machines on which machines we create this datadir

   @return boolean - the succes status
  """
    ok = 1

    for machine in machines:
        if not chunkDisksMap.has_key(machine):
            logging.error("ERROR: No entry for machine %s in data chunk disk map %s" % (machine, chunkDisksMap))
            ok = 0
            continue

        chunkDisks = chunkDisksMap[machine]

        # Prepare search.config content
        searchConfig = []
        searchConfig.append(
            """datapath %s
urlbuckets urlbuckets
sorttempdir .
"""
            % chunkTermination
        )

        dirs = []
        for d in ["-data", "-data/logs", "-data/%s" % C.RUNCONFIGDIR_DIRNAME]:
            dirs.append("%s/%s%s" % (linkFarmDir, chunkTermination, d))

        for d in chunkDisks:
            searchConfig.append("disk %s/%s 1000\n" % (d, chunkPrefix))
            dirs.append("%s/%s/%s" % (d, chunkPrefix, chunkTermination))
            dirs.append("%s/%s/workqueue" % (d, chunkPrefix))
            dirs.append("%s/%s/workqueue/bin" % (d, chunkPrefix))

        if not E.mkdir([machine], string.join(dirs, " ")) or not E.ln(
            [machine],
            "%s/%s-data" % (linkFarmDir, chunkTermination),
            "%s/%s-data/data" % (linkFarmDir, chunkTermination),
        ):
            return false

        # create the search.config and distribute it
        fileName = "%s/%s-data/search.config" % (linkFarmDir, chunkTermination)
        tmpFile = "/tmp/search.config.tmp-%s" % chunkTermination
        try:
            open(tmpFile, "w").write(string.join(searchConfig, ""))
            if E.ERR_OK != E.distribute([machine], tmpFile, 1) or E.ERR_OK != E.execute(
                [machine], "mv -f %s %s" % (tmpFile, fileName), None, true
            ):
                ok = 0
            E.rm([E.LOCALHOST, machine], tmpFile)
        except IOError:
            ok = 0

        # set up link in workqueue slave binary directory (bin) to workqueue-slave
        # binary so that workqueue-slave can checksum/update itself
        for d in chunkDisks:
            if not E.ln(
                [machine],
                "%s/workqueue-slave" % binDir["workqueue-slave"],
                "%s/%s/workqueue/bin/current" % (d, chunkPrefix),
            ):
                ok = 0

    return ok
Ejemplo n.º 3
0
def create(linkFarmDir,
           chunkDisksMap, chunkPrefix, chunkTermination,
           binDir,
           machines):
  """
  This does the create job.
   @param linkFarmDir the directory where is the link frm. Here we create
     a data link, a log dir and log dir
   @param chunkDisks the map from machines to datadisks
   @param chunkPrefix
   @param chunkTermination the dir termination
   @param machines on which machines we create this datadir

   @return boolean - the succes status
  """
  ok = 1

  for machine in machines:
    if not chunkDisksMap.has_key(machine):
      logging.error(
        "ERROR: No entry for machine %s in data chunk disk map %s" % (
        machine, chunkDisksMap))
      ok = 0
      continue

    chunkDisks = chunkDisksMap[machine]

    # Prepare search.config content
    searchConfig = []
    searchConfig.append("""datapath %s
urlbuckets urlbuckets
sorttempdir .
""" % chunkTermination)

    dirs  = []
    for d in ["-data", "-data/logs", "-data/%s" % C.RUNCONFIGDIR_DIRNAME]:
      dirs.append("%s/%s%s" % (linkFarmDir, chunkTermination, d))

    for d in chunkDisks:
      searchConfig.append("disk %s/%s 1000\n" % (d, chunkPrefix))
      dirs.append("%s/%s/%s" % (d, chunkPrefix, chunkTermination))
      dirs.append("%s/%s/workqueue" % (d, chunkPrefix))
      dirs.append("%s/%s/workqueue/bin" % (d, chunkPrefix))

    if ( not E.mkdir([machine], string.join(dirs, " ")) or
         not E.ln([machine],
                  "%s/%s-data" % (linkFarmDir, chunkTermination),
                  "%s/%s-data/data" % (linkFarmDir, chunkTermination)) ):
        return false

    # create the search.config and distribute it
    fileName = "%s/%s-data/search.config" % (linkFarmDir,
                                             chunkTermination)
    tmpFile = "/tmp/search.config.tmp-%s" % chunkTermination;
    try:
      open(tmpFile, "w").write(string.join(searchConfig, ""))
      if ( E.ERR_OK != E.distribute([machine], tmpFile, 1) or
           E.ERR_OK != E.execute([machine],
                                 "mv -f %s %s" % (tmpFile, fileName),
                                 None, true) ):
        ok = 0
      E.rm([E.LOCALHOST, machine], tmpFile)
    except IOError:
      ok = 0

    # set up link in workqueue slave binary directory (bin) to workqueue-slave
    # binary so that workqueue-slave can checksum/update itself
    for d in chunkDisks:
      if not E.ln([machine],
                  '%s/workqueue-slave' % binDir['workqueue-slave'],
                  '%s/%s/workqueue/bin/current' % (d, chunkPrefix)):
        ok = 0

  return ok;