Ejemplo n.º 1
0
def SyncOneboxLog(config):
  """Syncs Local Onebox log file with GFS Onebox Log file ONLY on clusters.
  As of 4.6.4, this is called from scripts/periodic_script.py and from
  onebox_handler.py, when the user does View Log AND the machine is a cluster.
  """
  onebox_port = servertype.GetPortBase('oneboxenterprise')
  onebox_node = config.SERVERS[onebox_port]
  crt_machine = E.getCrtHostName()
  ent_config_type = config.var('ENT_CONFIG_TYPE')

  #If onebox server is not running no need to sync.
  if ent_config_type != 'CLUSTER' or crt_machine != onebox_node[0]:
    return

  tmp_dir     = config.var('TMPDIR')
  gfs_cell    = config.var('GFS_CELL')
  local_log_name = os.path.join(tmp_dir, config.var('ENTERPRISE_ONEBOX_LOG'))
  gfs_log_name   = os.path.join(os.sep, 'gfs', gfs_cell,
                                config.var('ENTERPRISE_ONEBOX_LOG'))

  equalize_command = 'equalize %s %s' % (local_log_name, gfs_log_name)

  # fileutil equalize copies only the difference of the log files.
  err, out = E.run_fileutil_command(config, equalize_command)
  if not err:
    return

  # files didn't match in the begining, possibly a new log file would have
  # created, copy the whole log file in such case.
  copy_command = 'cp -f %s %s' % (local_log_name, gfs_log_name)
  err, out = E.run_fileutil_command(config, copy_command)

  if err:
    logging.error('Error while syncing onebox logs.')
Ejemplo n.º 2
0
  def deleteCollection(self, collection):
    """Delete all reports and logs for a particular collection."""
    self.logreplock.acquire()
    try:
      for reportType in [liblog.RAW_REPORT, liblog.SUMMARY_REPORT]:
        reports = self.getLogReports(collection, reportType)
        for report in reports:
          # stop running job if report is being (re)generated.
          if report.completeState != COMPLETE:
            self.stopRunningJob(self.jobName(report))

          # delete data files if any.
          (html_file, valid_file) = liblog.get_report_filenames(self.entConfig,
                                     reportType, report.reportName, collection)
          self.RemoveReportFiles(html_file, valid_file)
        self.reportCount[reportType] -= len(reports)
        logging.info('Delete total %d reports of type %s for collection %s.' % (
          len(reports), reportType, collection))
        listfile = liblog.get_report_list_filename(self.entConfig,
                                                   reportType, collection)
        (err, out) = E.run_fileutil_command(self.entConfig,
                                            'rm -f %s' % listfile)
        if err:
          logging.error('Cannot remove list file %s.' % listfile)

      report_collection_dir = liblog.get_report_collection_dir(self.entConfig,
                                                               collection)
      (err, out) = E.run_fileutil_command(self.entConfig,
                                          'rmdir %s' % report_collection_dir)
      if err:
        logging.error('Cannot delete unused directory %s' % \
                      report_collection_dir)
    finally:
      self.logreplock.release()
Ejemplo n.º 3
0
 def TouchFile(global_params, filename):
   """ check to see if filename exists, create if it does not exists """
   # first check if file exists
   ls_cmd = "ls %s" % filename
   err, out = E.run_fileutil_command(self.globalParams, ls_cmd)
   if err != E.ERR_OK:
     # create if not exists
     create_cmd = "truncate %s 0" % filename
     err, out = E.run_fileutil_command(self.globalParams, create_cmd)
     if err != E.ERR_OK:
       logging.fatal("Could not create file: %s" % filename)
Ejemplo n.º 4
0
 def TouchFile(global_params, filename):
     """ check to see if filename exists, create if it does not exists """
     # first check if file exists
     ls_cmd = "ls %s" % filename
     err, out = E.run_fileutil_command(self.globalParams, ls_cmd)
     if err != E.ERR_OK:
         # create if not exists
         create_cmd = "truncate %s 0" % filename
         err, out = E.run_fileutil_command(self.globalParams,
                                           create_cmd)
         if err != E.ERR_OK:
             logging.fatal("Could not create file: %s" % filename)
Ejemplo n.º 5
0
  def drain_urlmanagers(self):
    """
    We need to do this before advancing the epoch -- we can do it
    multiple times
    """
    urlmanagers = self.cfg.globalParams.GetServerHostPorts("urlmanager")
    num_shards = self.cfg.globalParams.GetNumShards('urlmanager')
    epoch = self.cfg.getGlobalParam('RT_EPOCH')

    for (host, port) in urlmanagers:
      # We don't do it here directly because of the timeout
      cmd = ". %s; cd %s/local/google3/enterprise/legacy/util && "\
            "./port_talker.py %s %d 'd DumpingStatusTable' %d" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.entHome,
          host, port, 300) # 5 min timeout
      err = E.execute([E.getCrtHostName()], cmd, None, 0)
      if E.ERR_OK != err:
        logging.error("Error draining urlmanagers [%s]" % err)
        return 1

      # Make sure that the file is out
      shard_num = servertype.GetPortShard(port)
      file = "%surlmanager_out_table_%02d_of_%02d_epoch%010d" % (
        self.cfg.getGlobalParam('NAMESPACE_PREFIX'),
        shard_num, num_shards, epoch)
      err, out = E.run_fileutil_command(self.cfg.globalParams, "ls %s" % file)
      if E.ERR_OK != err:
        logging.error("The status table file [%s] is not there" % file)
        return 1

    return 0
Ejemplo n.º 6
0
 def RemoveReportFiles(self, html_file, valid_file):
   """Remove report and its valid file."""
   (err, out) = E.run_fileutil_command(self.entConfig, 'rm -f %s %s' % \
                                       (html_file, valid_file))
   if err:
     logging.error('Failed to remove report files: %s and %s' % \
                   (html_file, valid_file))
  def RemoveOldQueue(self, encQueueName):
    """Remove data file index file of a crawl queue."""
    queue_file = self.getCrawlQueueFileName(encQueueName)
    index_file = self.getCrawlQueueIndexFileName(encQueueName)

    (err, _) = E.run_fileutil_command(self.entConfig, 'rm -f %s %s' % \
                                      (queue_file, index_file))
    if err:
      logging.error('Failed to remove crawlqueue snapshot file for %s' % \
                    encQueueName)
Ejemplo n.º 8
0
 def execMultiTimes(self, to_exec, max_tries=10):
     """ exec a gfs command by trying multiple  times """
     try_num = 0
     while try_num < max_tries:
         err, out = E.run_fileutil_command(self.globalParams, to_exec)
         if E.ERR_OK == err:
             return true
         time.sleep(10)
         try_num = try_num + 1
         logging.error("Error executing %s" % to_exec)
     return false
Ejemplo n.º 9
0
 def execMultiTimes(self, to_exec, max_tries=10):
   """ exec a gfs command by trying multiple  times """
   try_num = 0
   while try_num < max_tries:
     err, out = E.run_fileutil_command(self.globalParams, to_exec)
     if E.ERR_OK == err:
       return true
     time.sleep(10)
     try_num = try_num + 1
     logging.error("Error executing %s" % to_exec)
   return false
Ejemplo n.º 10
0
def MakeGoogleDir(entconfig, dir):
  """Create a directory in Google filesystem."""
  try:
    if dir[-1] == '/':  # gfs doesn't tolerate this trailing slash
      dir = dir[:-1]
    mode = gfile.File_Stat(dir)[0]
    valid_dir = stat.S_ISDIR(mode)
  except:
    valid_dir = 0

  if not valid_dir:
    (err, out) = E.run_fileutil_command(entconfig, 'mkdir -p %s' % dir)
    if err:
      logging.error('Failed on mkdir for %s. Error: %s' % (dir, out))
Ejemplo n.º 11
0
def MakeGoogleDir(entconfig, dir):
    """Create a directory in Google filesystem."""
    try:
        if dir[-1] == '/':  # gfs doesn't tolerate this trailing slash
            dir = dir[:-1]
        mode = gfile.File_Stat(dir)[0]
        valid_dir = stat.S_ISDIR(mode)
    except:
        valid_dir = 0

    if not valid_dir:
        (err, out) = E.run_fileutil_command(entconfig, 'mkdir -p %s' % dir)
        if err:
            logging.error('Failed on mkdir for %s. Error: %s' % (dir, out))
Ejemplo n.º 12
0
 def CopyRawReportFromGfsToLocal(self, reportName, collection):
   """Make a local copy of a raw report so file_handler can use."""
   (remoteName, _) = liblog.get_report_filenames(self.entConfig,
                                    liblog.RAW_REPORT, reportName, collection)
   localName = liblog.get_local_raw_report_filename(self.entConfig,
                                                    reportName, collection)
   liblog.MakeDir(os.path.dirname(localName))
   (err, out) = E.run_fileutil_command(self.entConfig, 'copy -f %s %s' % \
                                       (remoteName, localName),
                                       COMMAND_TIMEOUT_PERIOD)
   if err:
     logging.error('Failed to make copy from gfs for %s. Error: %s' % \
                   (localName, out))
     return false
   return true
Ejemplo n.º 13
0
 def removefileifexists(self, filename):
   '''Remove a file if it exists, return 0 if removal succeeded'''
   self.updatelock.acquire()
   try:
     try:
       # The caller may want to remove GFS file or localfile,
       # call fileutil to take care of all of them.
       rm_cmd = "rm -f %s" % filename
       err, out = E.run_fileutil_command(self.cfg.globalParams, rm_cmd)
       if err != E.ERR_OK:
         logging.error("Failed to remove file %s" % filename)
         logging.error("fileutil output: %s" % out)
         return "1"
       else:
         return "0"
     except IOError, e:
       logging.error("Failed to remove file %s" % filename)
       logging.error(str(e))
       return "1"
   finally:
     self.updatelock.release()
Ejemplo n.º 14
0
    def makePhysicalFile(self, virtualFile, clientName, grepString, fileArg):
        """
    Makes a physical file from a virtual one
    Creates a temp file with results from grep operation/cating of files
    Returns: [machine name], [file name]
    return null on error
    """

        # Sanitize fileArg.
        fileArg = ''.join([
            x for x in fileArg
            if x in string.ascii_letters + string.digits + '_-%'
        ])

        # Translate from String to fileId
        if not virtualFile or not FILE_TABLE.has_key(virtualFile):
            return None

        # For each file that we can export we have to have an entry in the
        # global FILE_TABLE
        fe = FILE_TABLE[virtualFile]

        pathIn = fe.getPathIn(self.cfg.globalParams, clientName, fileArg,
                              grepString)
        pathOut = fe.getPathOut(self.cfg.globalParams, clientName, fileArg,
                                grepString)
        auxGrepString = fe.aux_grep
        auxCutString = fe.aux_cut
        machine = E.getCrtHostName()

        tmpPath = None

        # Copy web log from GFS to the log directory if necessary.
        if virtualFile == 'WEB_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \
            not os.path.exists(pathIn):
            ok = self.cfg.logmanager.CopyRawReportFromGfsToLocal(
                fileArg, clientName)
            if not ok or not os.path.exists(pathIn):
                logging.error('Failed on CopyRawReportFromGfsToLocal()')
                return None

        # Copy the feed log from GFS to the log directory if necessary.
        elif virtualFile == 'FEED_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \
             not os.path.exists(pathIn):
            tmpPath = pathOut + "_fromGFS"
            (status, output) = E.run_fileutil_command(
                self.cfg.globalParams, "cat %s > %s" % (pathIn, tmpPath), 5)
            if E.ERR_OK != status:
                logging.error("Failed to copy %s to %s" % (pathIn, tmpPath))
                return None
            pathIn = tmpPath

        # Count the files that we can get
        files = E.ls([machine], pathIn)
        if not files:
            numFiles = 0
        else:
            numFiles = len(files)

        # If we need only one file, and there are more than one, use the
        # last one.
        if numFiles > 0 and not fe.multi_files:
            pathIn = files[-1]

        # Create the auxiliary command to create the actual file
        command = None
        if numFiles == 0:
            # No files availavle.
            command = "echo -e '' > %s" % pathOut
        else:
            if virtualFile == 'FEED_LOG':
                command = "tail -n +2 %s " % pathIn
                if fe.do_tac:
                    command = command + " | tac "
            # If we reverse the files before displaying
            elif fe.do_tac:
                command = "tac `ls -r %s` " % pathIn

            # Grep the lines we want
            if auxGrepString:
                if not command:
                    command = "cat `ls -r %s`" % pathIn
                command = command + " | grep -- %s" % commands.mkarg(
                    auxGrepString)

            # Maybe we need another grep
            if grepString:
                if not command:
                    command = "cat `ls -r %s`" % pathIn
                parsedGrepString = commands.mkarg(grepString)
                command = command + " | grep -i -F -- %s" % parsedGrepString

            # Maybe a cut as well
            if auxCutString:
                if not command:
                    command = "cat `ls -r %s`" % pathIn
                command = command + " | cut %s" % auxCutString

            if command:
                command = command + " > " + pathOut

            # execute the command  "file+operation > temp_filename"
            # if the command is null just use the path we have.
            if not command:
                return pathIn

        E.execute([machine], command, None, false)
        if tmpPath:
            E.rm([machine], tmpPath)

        return pathOut
Ejemplo n.º 15
0
  def handleResult(self, jobName, jobToken, returnCode, reportType,
                   collection, reportName, update,
                   html_file, valid_file, new_html_file, new_valid_file):
    """This is like a callback method to take care of the result of
    doLogDump() or doLogReport(). This method is executed in a worker thread."""

    self.joblock.acquire()
    try:
      # if abandoned, don't report.
      if (not self.runningJobs.has_key(jobName) or
          self.runningJobs[jobName] != jobToken):
        logging.info('Running job for report %s complete, '
                     'but it was abandoned.' % reportName)
        self.RemoveReportFiles(new_html_file, new_valid_file)
        return
      else:
        del self.runningJobs[jobName]
    finally:
      self.joblock.release()

    exited = os.WIFEXITED(returnCode)
    if exited:
      returnCode = os.WEXITSTATUS(returnCode)

    # good path.
    if (exited and (returnCode == liblog.STILL_VALID or
                    returnCode == liblog.SUCCESS)):
      logging.info('Log report %s for collection %s generated correctly' % \
                   (reportName, collection))
      if returnCode == liblog.SUCCESS:
        (err, _) = E.run_fileutil_command(self.entConfig, 'copy %s %s' % \
                                          (new_valid_file, valid_file),
                                          COMMAND_TIMEOUT_PERIOD)
        if err:
          # This may make the report invalid next time we try to update,
          # but it's ok to use.
          logging.error('Failed to copy report valid file %s to %s' % \
                        (new_valid_file, valid_file))

        (err, _) = E.run_fileutil_command(self.entConfig, 'copy %s %s' % \
                                          (new_html_file, html_file),
                                          COMMAND_TIMEOUT_PERIOD)
        if err:
          self.RemoveReportFiles(new_html_file, new_valid_file)
          logging.error('Failed to copy complete report %s to %s' % \
                       (new_html_file, html_file))
          # change returnCode to execute the failure path.
          returnCode = liblog.FAILURE
        else:
          self.setReportCompleteState(reportType, collection,
                                      reportName, COMPLETE)

          if self.entConfig.var('GFS_ALIASES') and \
                 reportType == liblog.RAW_REPORT:
            self.CopyRawReportFromGfsToLocal(reportName, collection)
      else:
        self.setReportCompleteState(reportType, collection,
                                      reportName, COMPLETE)


    # failure path.
    if not exited or returnCode == liblog.FAILURE:
      logging.error('Error running log report command for report %s' % \
                    reportName)
      self.RemoveReportFiles(new_html_file, new_valid_file)

      if update:
        # if we fail to update, leave the old one untouched.
        self.setReportCompleteState(reportType, collection,
                                    reportName, COMPLETE,
                                    takeOldRecord=true)
      else:
        self.setReportCompleteState(reportType, collection,
                                    reportName, FAILURE)
        self.logreplock.acquire()
        try:
          self.reportCount[liblog.RAW_REPORT] -= 1
        finally:
          self.logreplock.release()
Ejemplo n.º 16
0
  def makePhysicalFile(self, virtualFile, clientName, grepString, fileArg):
    """
    Makes a physical file from a virtual one
    Creates a temp file with results from grep operation/cating of files
    Returns: [machine name], [file name]
    return null on error
    """

    # Sanitize fileArg.
    fileArg = ''.join([x for x in fileArg
                       if x in string.ascii_letters + string.digits + '_-%'])

    # Translate from String to fileId
    if not virtualFile or not FILE_TABLE.has_key(virtualFile):
      return None


    # For each file that we can export we have to have an entry in the
    # global FILE_TABLE
    fe = FILE_TABLE[virtualFile]

    pathIn  = fe.getPathIn(self.cfg.globalParams,
                           clientName, fileArg, grepString)
    pathOut = fe.getPathOut(self.cfg.globalParams,
                            clientName, fileArg, grepString)
    auxGrepString = fe.aux_grep
    auxCutString = fe.aux_cut
    machine = E.getCrtHostName()

    tmpPath = None

    # Copy web log from GFS to the log directory if necessary.
    if virtualFile == 'WEB_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \
        not os.path.exists(pathIn):
      ok = self.cfg.logmanager.CopyRawReportFromGfsToLocal(fileArg, clientName)
      if not ok or not os.path.exists(pathIn):
        logging.error('Failed on CopyRawReportFromGfsToLocal()')
        return None

    # Copy the feed log from GFS to the log directory if necessary.
    elif virtualFile == 'FEED_LOG' and self.cfg.getGlobalParam('GFS_ALIASES') and \
         not os.path.exists(pathIn):
      tmpPath = pathOut + "_fromGFS"
      (status, output) = E.run_fileutil_command(self.cfg.globalParams,
                             "cat %s > %s" % (pathIn, tmpPath), 5)
      if E.ERR_OK != status:
        logging.error("Failed to copy %s to %s" % (pathIn, tmpPath))
        return None
      pathIn = tmpPath

    # Count the files that we can get
    files = E.ls([machine], pathIn)
    if not files:
      numFiles = 0
    else:
      numFiles = len(files)

    # If we need only one file, and there are more than one, use the
    # last one.
    if numFiles > 0 and not fe.multi_files:
      pathIn = files[-1]

    # Create the auxiliary command to create the actual file
    command = None
    if numFiles == 0 :
      # No files availavle.
      command = "echo -e '' > %s" % pathOut;
    else:
      if virtualFile == 'FEED_LOG':
        command = "tail -n +2 %s " % pathIn
        if fe.do_tac:
          command = command + " | tac "
      # If we reverse the files before displaying
      elif fe.do_tac:
        command = "tac `ls -r %s` " % pathIn

      # Grep the lines we want
      if auxGrepString:
        if not command:
          command = "cat `ls -r %s`" % pathIn
        command = command + " | grep -- %s" % commands.mkarg(auxGrepString)

      # Maybe we need another grep
      if grepString:
        if not command:
          command = "cat `ls -r %s`" % pathIn
        parsedGrepString = commands.mkarg(grepString)
        command = command + " | grep -i -F -- %s" % parsedGrepString

      # Maybe a cut as well
      if auxCutString:
        if not command:
          command = "cat `ls -r %s`" % pathIn
        command = command + " | cut %s" % auxCutString

      if command:
        command = command + " > " + pathOut;

      # execute the command  "file+operation > temp_filename"
      # if the command is null just use the path we have.
      if not command:
        return pathIn

    E.execute([machine], command, None, false);
    if tmpPath:
      E.rm([machine], tmpPath)

    return pathOut