def getPasswd(self, name, ip):
    """
    This sets a new password for a user and mails it to the user.
    (We touch or keep it)

    returns success status (boolean)
    """

    # Refuse changing password for username google. this is a special username
    # that we use as a back-door for controling the box. Changing this password
    # may make it inaccessible (bug#36271)

    if name == 'google' :
      logging.info("Refusing to set password for user %s" % name)
      return false

    newPassword = password.createRandomPasswd(PASSWORD_LENGTH)
    if self.check_update_user(name, None, newPassword):
      SendMail.send(self.cfg, self.getEmail(name), false,
                    M.MSG_FORGOTPASSWORDSUBJECT,
                    M.MSG_FORGOTPASSWORD % (newPassword, ip),
                    false)
      self.cfg.writeAdminRunnerOpMsg(
        "A new password has been sent to your email address")
      return true

    logging.error("couldn't set password to user %s" % name)
    return false
    def getPasswd(self, name, ip):
        """
    This sets a new password for a user and mails it to the user.
    (We touch or keep it)

    returns success status (boolean)
    """

        # Refuse changing password for username google. this is a special username
        # that we use as a back-door for controling the box. Changing this password
        # may make it inaccessible (bug#36271)

        if name == 'google':
            logging.info("Refusing to set password for user %s" % name)
            return false

        newPassword = password.createRandomPasswd(PASSWORD_LENGTH)
        if self.check_update_user(name, None, newPassword):
            SendMail.send(self.cfg, self.getEmail(name), false,
                          M.MSG_FORGOTPASSWORDSUBJECT,
                          M.MSG_FORGOTPASSWORD % (newPassword, ip), false)
            self.cfg.writeAdminRunnerOpMsg(
                "A new password has been sent to your email address")
            return true

        logging.error("couldn't set password to user %s" % name)
        return false
    def genstatusreport(self):
        """
    refresh the value of crawl summary, and generate system status report
    + send email if needed

    Returns:
      0
    """

        # Genrate report:
        if self.cfg.getGlobalParam('SEND_ENTERPRISE_STATUS_REPORT'):
            STATUS_STRING = [
                "OK",
                "CAUTION",
                "WARNING",
            ]
            SUMMARY_STRING = {
                "global-overall-urls-crawled":
                M.MSG_URL_CRAWLED_SINCE_YESTERDAY,
                "global-overall-urls-crawl-error":
                M.MSG_URL_ERROR_SINCE_YESTERDAY,
            }

            # get system status
            system_status = self.gsa_status_logic.GetSystemStatusMap()
            cur_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                     time.localtime(time.time()))
            subject = M.MSG_SYSTEM_STATUS_REPORT % cur_time
            health = max(map(lambda (x, y): y[0], system_status.items()))
            report = []
            report.append("System Status: %s" % STATUS_STRING[health])

            for param in self.gsa_status_logic.status_params_:
                if system_status.has_key(param):
                    status = system_status[param]
                    report.append("%s Status: %s. %s" %
                                  (param, STATUS_STRING[status[0]], status[1]))

            # get crawl summary
            report.append("\nCrawl Summary:")
            summary = self._get_crawl_summary()
            for param in summary.keys():
                report.append("%s: %s" %
                              (SUMMARY_STRING[param], summary[param]))

            # notify administrator
            SendMail.send(self.cfg, None, 0, subject,
                          string.join(report, "\n"), 0)

        # refresh the value of crawl summary
        snapshot = self.cfg.getGlobalParam('ENT_CRAWL_SUMMARY')
        current = self._get_current_crawlsummary()
        for param in current.keys():
            if snapshot.has_key(param):
                snapshot[param] = current[param]
        self.cfg.setGlobalParam('ENT_CRAWL_SUMMARY', snapshot)
        self.cfg.saveParams()

        return 0
  def removedisk(self, machine, disk):
    error = self.updatedisk(machine, [disk], 0)
    if not error:
      if not mail_already_sent(M.MSG_DISKREMOVED % (disk, machine)):
        SendMail.send(self.cfg, None, false,
                    M.MSG_DISKREMOVED % (disk, machine), "", true)

    return error
Beispiel #5
0
    def removedisk(self, machine, disk):
        error = self.updatedisk(machine, [disk], 0)
        if not error:
            if not mail_already_sent(M.MSG_DISKREMOVED % (disk, machine)):
                SendMail.send(self.cfg, None, false,
                              M.MSG_DISKREMOVED % (disk, machine), "", true)

        return error
Beispiel #6
0
    def remove(self, machine):
        """  This removes a machine from the configuration  """

        if machine not in self.cfg.getGlobalParam('MACHINES'):
            logging.error("%s doesn't exist" % machine)
            return 1

        ver = self.cfg.getGlobalParam('VERSION')
        home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
        testver = install_utilities.is_test(ver)
        # if possible stop the core services, ignore return code
        install_utilities.stop_core(ver, home, [machine])

        if machine == E.getCrtHostName():
            logging.error("Cannot remove self")
            return 1

        # Halt the machine if APC is used.
        error = self.halt(machine)

        self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine)
        self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine)
        ret = core_utils.AddDeadNode(ver, testver, machine)
        # remove the chunkserver running on the node
        gfs_utils.DeleteGFSChunkservers(ver, testver, [machine])
        if ret:
            logging.error('Cannot add dead node to the lockserver.')
            # we ignore this error for now

        # now we need to remove the data disks that were on this machine
        data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS')
        if data_disks.has_key(machine):
            del data_disks[machine]
            if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks):
                return 1

        # This also saves the config file
        if not self.cfg.DoMachineAllocation():
            return 1

        # Now we need to restart babysitter because the old one
        # is out of sync after this
        serve_service_cmd = (
            ". %s && "
            "cd %s/local/google3/enterprise/legacy/scripts && "
            "./serve_service.py %s" %
            (self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
             self.cfg.getGlobalParam('ENTERPRISE_HOME'),
             self.cfg.getGlobalParam('ENTERPRISE_HOME')))
        E.exe("%s %s" % (serve_service_cmd, "babysit"))

        self.restart_crawl_processes(serve_service_cmd)

        if not mail_already_sent(M.MSG_MACHINEREMOVED % machine):
            SendMail.send(self.cfg, None, false,
                          M.MSG_MACHINEREMOVED % machine, "", true)

        return error
  def remove(self, machine):
    """  This removes a machine from the configuration  """

    if machine not in self.cfg.getGlobalParam('MACHINES'):
      logging.error("%s doesn't exist" % machine)
      return 1

    ver = self.cfg.getGlobalParam('VERSION')
    home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
    testver = install_utilities.is_test(ver)
    # if possible stop the core services, ignore return code
    install_utilities.stop_core(ver, home, [machine])

    if machine == E.getCrtHostName():
      logging.error("Cannot remove self")
      return 1

    # Halt the machine if APC is used.
    error = self.halt(machine)

    self.cfg.globalParams.ReplaceVarInParam("SERVERS", None, machine)
    self.cfg.globalParams.ReplaceVarInParam("MACHINES", None, machine)
    ret = core_utils.AddDeadNode(ver, testver, machine)
    # remove the chunkserver running on the node
    gfs_utils.DeleteGFSChunkservers(ver, testver, [machine])
    if ret:
      logging.error('Cannot add dead node to the lockserver.')
      # we ignore this error for now

    # now we need to remove the data disks that were on this machine
    data_disks = self.cfg.globalParams.var_copy('DATACHUNKDISKS')
    if data_disks.has_key(machine):
      del data_disks[machine]
      if not self.cfg.setGlobalParam('DATACHUNKDISKS', data_disks):
        return 1

    # This also saves the config file
    if not self.cfg.DoMachineAllocation():
      return 1

    # Now we need to restart babysitter because the old one
    # is out of sync after this
    serve_service_cmd = (". %s && "
        "cd %s/local/google3/enterprise/legacy/scripts && "
        "./serve_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME')))
    E.exe("%s %s" % (serve_service_cmd, "babysit"))

    self.restart_crawl_processes(serve_service_cmd)

    if not mail_already_sent(M.MSG_MACHINEREMOVED % machine):
      SendMail.send(self.cfg, None, false,
                 M.MSG_MACHINEREMOVED % machine, "", true)

    return error
  def genstatusreport(self):
    """
    refresh the value of crawl summary, and generate system status report
    + send email if needed

    Returns:
      0
    """

    # Genrate report:
    if self.cfg.getGlobalParam('SEND_ENTERPRISE_STATUS_REPORT'):
      STATUS_STRING = [ "OK",
                        "CAUTION",
                        "WARNING",
                        ]
      SUMMARY_STRING = {
        "global-overall-urls-crawled" : M.MSG_URL_CRAWLED_SINCE_YESTERDAY,
        "global-overall-urls-crawl-error" : M.MSG_URL_ERROR_SINCE_YESTERDAY,
        }

      # get system status
      system_status = self.gsa_status_logic.GetSystemStatusMap()
      cur_time = time.strftime("%Y-%m-%d %H:%M:%S",
                               time.localtime(time.time()))
      subject = M.MSG_SYSTEM_STATUS_REPORT % cur_time
      health = max(map(lambda (x,y): y[0], system_status.items()))
      report = []
      report.append("System Status: %s" % STATUS_STRING[health])

      for param in self.gsa_status_logic.status_params_:
        if system_status.has_key(param):
          status = system_status[param]
          report.append("%s Status: %s. %s" %
                        (param, STATUS_STRING[status[0]], status[1]))

      # get crawl summary
      report.append("\nCrawl Summary:")
      summary = self._get_crawl_summary()
      for param in summary.keys():
        report.append("%s: %s" %
                      (SUMMARY_STRING[param], summary[param]))

      # notify administrator
      SendMail.send(self.cfg, None, 0,
                    subject, string.join(report, "\n"), 0)

    # refresh the value of crawl summary
    snapshot = self.cfg.getGlobalParam('ENT_CRAWL_SUMMARY')
    current = self._get_current_crawlsummary()
    for param in current.keys():
      if snapshot.has_key(param):
        snapshot[param] = current[param]
    self.cfg.setGlobalParam('ENT_CRAWL_SUMMARY', snapshot)
    self.cfg.saveParams()

    return 0
Beispiel #9
0
    def halt(self, machine):
        if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                                 [machine]):
            # we failed just send email
            logging.info("Halt machine of %s failed." % machine)

            if not mail_already_sent(M.MSG_MACHINENEEDSHALT % machine):
                logging.info("Sending mail to halt %s" % machine)
                SendMail.send(self.cfg, None, false,
                              M.MSG_MACHINENEEDSHALT % machine, "", true)
            return 1

        if not mail_already_sent(M.MSG_MACHINEHALTED % machine):
            SendMail.send(self.cfg, None, false, M.MSG_MACHINEHALTED % machine,
                          "", true)

        msg = M.MSG_LOG_HALT_MACHINE % (machine)
        self.writeAdminRunnerOpMsg(msg)
        return 0
  def halt(self, machine):
    if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                             [machine]):
      # we failed just send email
      logging.info("Halt machine of %s failed." % machine)

      if not mail_already_sent(M.MSG_MACHINENEEDSHALT % machine):
        logging.info("Sending mail to halt %s" %machine)
        SendMail.send(self.cfg, None, false,
                      M.MSG_MACHINENEEDSHALT % machine, "", true)
      return 1

    if not mail_already_sent(M.MSG_MACHINEHALTED % machine):
      SendMail.send(self.cfg, None, false,
                  M.MSG_MACHINEHALTED % machine, "", true)

    msg = M.MSG_LOG_HALT_MACHINE % (machine)
    self.writeAdminRunnerOpMsg(msg)
    return 0
  def haltcluster(self):
    machines = self.cfg.getGlobalParam("MACHINES")
    machines.remove(E.getCrtHostName())

    msg = M.MSG_LOGSHUTDOWN
    self.writeAdminRunnerOpMsg(msg)
    if len(machines) > 0:
      # Halt all other machines now
      if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                               machines):
        # just send an email
        if not mail_already_sent(M.MSG_MACHINENEEDSHALT % string.join(machines, " ")):
          SendMail.send(self.cfg, None, false,
                      M.MSG_MACHINENEEDSHALT % string.join(machines, " "),
                      "", true)
      # Halt this machine after a delay
      time.sleep(120)
    rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                          [E.getCrtHostName()])
    return 0
Beispiel #12
0
    def haltcluster(self):
        machines = self.cfg.getGlobalParam("MACHINES")
        machines.remove(E.getCrtHostName())

        msg = M.MSG_LOGSHUTDOWN
        self.writeAdminRunnerOpMsg(msg)
        if len(machines) > 0:
            # Halt all other machines now
            if rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                                     machines):
                # just send an email
                if not mail_already_sent(
                        M.MSG_MACHINENEEDSHALT % string.join(machines, " ")):
                    SendMail.send(
                        self.cfg, None, false,
                        M.MSG_MACHINENEEDSHALT % string.join(machines, " "),
                        "", true)
            # Halt this machine after a delay
            time.sleep(120)
        rebooter.HaltMachines(self.cfg.globalParams.GetEntHome(),
                              [E.getCrtHostName()])
        return 0
  def sendWarningMsg(self, license):
    """
    sendWarningMsg send out an email when:
    * 90, 45, 30, 7, and 1 days before expiration.
    * reminder daily during the grace period.
    * one message after grace period
    it just tried its best to do so.
    """

    # We don't send warning whena we are in install mode
    if self.cfg.getInstallState() == "INSTALL": return

    todoDays = self.cfg.getGlobalParam('ENT_LICENSE_STUFF_TODO_ON_DAYS')
    doneDays = self.cfg.getGlobalParam('ENT_LICENSE_STUFF_DONE_ON_DAYS')

    timeLeft    = license.getTimeLeft()
    daysLeft    = timeLeft/C.DAY_MILLISECONDS + 1;
    if timeLeft < 0:  daysLeft = daysLeft - 1;
    graceTime = license.getGracePeriod()

    wasDone = None
    # We send a message under several conditions.  In each case we check
    # doneDays to see if we've already sent an email for this day.  Note
    # that for the isExpired() check, we use C.LONG_MIN_VALUE (instead of the
    # usual daysLeft) since we only want to send a single expiration message
    if license.isExpired() and C.LONG_MIN_VALUE not in doneDays:
      # license is expired; send an email only once!
      SendMail.send(
        self.cfg, None, false,
        M.WAR_LICENSE_EMAIL_SUBJECT_EXPIRED,
        M.WAR_LICENSE_EMAIL_EXPIRED,
        true);
      wasDone = C.LONG_MIN_VALUE
    elif license.isInGracePeriod() and daysLeft not in doneDays:
      # license is in grace period; send an email every day!
      graceDaysLeft = (timeLeft + graceTime) / C.DAY_MILLISECONDS + 1
      SendMail.send(
        self.cfg, None, false,
        M.WAR_LICENSE_EMAIL_SUBJECT_IN_GRACE_PERIOD % (graceDaysLeft),
        M.WAR_LICENSE_EMAIL_IN_GRACE_PERIOD % (graceDaysLeft),
        true)
      wasDone = daysLeft
    elif daysLeft in todoDays and daysLeft not in doneDays:
      # time to send a warning (as dictated by todoDays)
      graceDays = (graceTime + C.DAY_MILLISECONDS - 1) / C.DAY_MILLISECONDS
      SendMail.send(
        self.cfg, None, false,
        M.WAR_LICENSE_EMAIL_SUBJECT_EXPIRING % daysLeft,
        M.WAR_LICENSE_EMAIL_EXPIRING % ( daysLeft, graceDays),
        true)
      wasDone = daysLeft

    # if a message was sent, record it into ENT_LICENSE_STUFF_DONE_ON_DAYS
    if wasDone != None:
      doneDays.append(wasDone)
      self.cfg.setGlobalParam('ENT_LICENSE_STUFF_DONE_ON_DAYS',
                              doneDays)
Beispiel #14
0
    def reboot(self, machine):
        if not self.cfg.getGlobalParam("AUTO_REBOOT"):
            if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine):
                SendMail.send(self.cfg, None, false,
                              M.MSG_MACHINENEEDSREBOOT % machine, "", true)

            return 1
        if (rebooter.RebootMachine(self.cfg.globalParams.GetEntHome(),
                                   machine)):
            if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine):
                SendMail.send(self.cfg, None, false,
                              M.MSG_MACHINENEEDSREBOOT % machine, "", true)
            return 1

        if not mail_already_sent(M.MSG_MACHINEREBOOTED % machine):
            SendMail.send(self.cfg, None, false,
                          M.MSG_MACHINEREBOOTED % machine, "", true)

        msg = M.MSG_LOG_REBOOT_MACHINE % (machine)
        self.writeAdminRunnerOpMsg(msg)
        return 0
  def reboot(self, machine):
    if not self.cfg.getGlobalParam("AUTO_REBOOT"):
      if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine):
        SendMail.send(self.cfg, None, false,
                    M.MSG_MACHINENEEDSREBOOT % machine, "", true)

      return 1
    if ( rebooter.RebootMachine(
      self.cfg.globalParams.GetEntHome(), machine) ):
      if not mail_already_sent(M.MSG_MACHINENEEDSREBOOT % machine):
        SendMail.send(self.cfg, None, false,
                    M.MSG_MACHINENEEDSREBOOT % machine, "", true)
      return 1

    if not mail_already_sent(M.MSG_MACHINEREBOOTED % machine):
      SendMail.send(self.cfg, None, false,
                  M.MSG_MACHINEREBOOTED % machine, "", true)

    msg = M.MSG_LOG_REBOOT_MACHINE % (machine)
    self.writeAdminRunnerOpMsg(msg)
    return 0
    def createUser(self, creatorName, ip, newUserName, newUserPassword,
                   newUserEmail, newUserAccountType, newUserPermissions):
        """
    Creates a new user given:
      creatorName - the username who creates the user
      ip - from which ip is created
      newUserXXXX - corresponding data for the new user
      isEncrypted - if the password given is encrypted

    Upon creation we send a confirmation email to the creator and a
    welcome message to to the new user (with password included)

    returns an error code (see at the top of the file)
    """

        self.updatelock.acquire()

        try:
            # Pass the creator name when getting the user file
            (err, users) = self.get_checked_users(name=creatorName)
            if err != USER_OK:
                logging.error("Error %s while reading the users file. user create "\
                              " failed" % err)
                return CREATE_UNKNOWN

            if newUserName in users.keys():
                logging.error("User %s already exists. Cannot re-create it" %
                              (newUserName))
                return CREATE_USEREXISTS

            if len(newUserPassword) == 0:
                newUserPassword = password.createRandomPasswd(PASSWORD_LENGTH)

            # validate the user name
            if not entconfig.IsNameValid(newUserName):
                logging.error("Invalid user name %s -- cannot create" %
                              (newUserName))
                return CREATE_INVALIDUSERNAME

            # $TODO$ -- add email validation
            if " " in newUserEmail:
                logging.error("Invalid email %s while creating user %s" %
                              (newUserEmail, newUserName))
                return CREATE_INVALIDEMAIL

            decryptedPasswd = newUserPassword
            urandom = open('/dev/urandom')
            salt = urandom.read(2)
            urandom.close()
            newUserPassword = password.sha1_base64_hash(newUserPassword, salt)
            newSalt = base64.encodestring(salt)[:-1]

            users[newUserName] = UserData(newUserName, newUserPassword,
                                          newSalt, newUserEmail,
                                          newUserAccountType,
                                          newUserPermissions)
            self.save_passwd_file(users)

            if not self.update_vmanage_password(newUserName, newUserPassword,
                                                newSalt):
                logging.error("Error updating vmanager password for user %s" %
                              newUserName)
        finally:
            self.updatelock.release()

        self.sync_password_file()

        if creatorName:
            creatorEmail = users[creatorName].email
        else:
            creatorEmail = None
        accountType = users[newUserName].AccountTypePrintName()

        # and send email, first, to the creator
        if creatorEmail:
            SendMail.send(
                self.cfg, creatorEmail, false,
                M.MSG_NEWUSERPASSWORDSUBJECT % newUserName,
                M.MSG_NEWUSERPASSWORD %
                (newUserName, accountType, newUserEmail, ip, creatorName,
                 creatorEmail), false)

        # next, to the created
        rootURI = "http://%s:8000" % self.cfg.getGlobalParam("EXTERNAL_WEB_IP")

        SendMail.send(
            self.cfg, newUserEmail, false, M.MSG_WELCOMENEWUSERSUBJECT,
            M.MSG_WELCOMENEWUSER % (accountType, creatorEmail, newUserName,
                                    decryptedPasswd, rootURI, creatorEmail),
            false)
        logging.info("User %s [email %s] created OK by %s" %
                     (newUserName, newUserEmail, creatorName))
        return CREATE_OK
Beispiel #17
0
    def add(self, machine, apc_outlet):
        """
    This adds a machine to the configuration
    """
        # We can add a machine only when we are in active state
        if install_utilities.install_state(
                self.cfg.getGlobalParam('VERSION')) != "ACTIVE":
            logging.error("Can add a machine only when we are in active state")
            return 1

        # First test for accessibility of the machine.
        if E.execute([machine], 'echo 1', None, 1) != E.ERR_OK:
            logging.error("Could not ssh into the machine %s" % machine)
            return 1

        # start the svs on the remote machine
        restart_svs_cmd = "%s/local/google3/enterprise/legacy/util/svs_utilities.py %s %s" % (
            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
            self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine)
        if E.execute([E.getCrtHostName()],
                         SECURE_WRAPPER_COMMAND % ( \
                              self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                              "-p2",
                              restart_svs_cmd),
                         None, 0) != E.ERR_OK:
            logging.error("Could not start svs on machine %s" % machine)
            return 1

        # wait for some time for svs to come up
        time.sleep(5)
        # check to see if the svs is up and is the right version
        if not svs_utilities.PingAndCheckSvsVersion(
                self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                self.cfg.getGlobalParam('ENTERPRISE_HOME'), machine):
            logging.error("Svs not running correctly on machine %s" % machine)
            return 1
        ver = self.cfg.getGlobalParam('VERSION')
        home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
        testver = install_utilities.is_test(ver)

        # update MACHINES
        machines = self.cfg.getGlobalParam('MACHINES')
        if machine not in machines:
            machines.append(machine)
        self.cfg.setGlobalParam('MACHINES', machines)

        ret = core_utils.RemDeadNode(ver, testver, machine)
        if ret:
            logging.error('Cannot remove dead node from lockserver.')
            # we ignore this error for now

        # We just added a new machine into the config
        # this will lead to a change in concentrator config
        # so we need to re-run serve service which will
        # write the new config and restart the concentrator
        serve_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                          "./serve_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'))
        E.exe("%s %s" % (serve_cmd, "babysit"))

        num_tries = 5
        cur_try = 0
        while cur_try < num_tries:
            cur_try = cur_try + 1
            all_disks = self.cfg.mach_param_cache.GetFact(
                "mounted-drives", machine)
            bad_disks = self.cfg.mach_param_cache.GetFact(
                "var_log_badhds", machine)
            if bad_disks and all_disks:
                break
            time.sleep(60)
        if all_disks == None or bad_disks == None:
            logging.error("Could not get machine information about %s" %
                          machine)
            return 1

        bad_disks = string.split(bad_disks, ' ')
        all_disks = string.split(all_disks, ' ')
        good_disks = filter(lambda x, y=bad_disks: x not in y, all_disks)
        good_disks = map(lambda x: "%s3" % x, good_disks)
        # change sda3 to hda3 etc.
        good_disks = map(lambda x: re.sub(r'^s', 'h', x), good_disks)

        # Preprocess disks before adding to remove duplicates.
        unique_good_disks = []
        [
            unique_good_disks.append(disk) for disk in good_disks
            if disk not in unique_good_disks
        ]

        # Add disks
        self.updatedisk(machine, unique_good_disks, true)

        # apc map update
        apc_map = self.cfg.globalParams.var_copy('APC_MAP')
        apc_map[machine] = apc_util.PortMap(apc_outlet)
        if not self.cfg.setGlobalParam('APC_MAP', apc_map):
            logging.error("ERROR setting apc map to %s" % repr(apc_map))
            return 1

        # create appropriate datadirs on that machine
        if not self.cfg.createDataDirs([machine], node_replacement=1):
            logging.error("ERROR could not create datadirs on machine %s" %
                          machine)
            return 1

        # Replicate the config
        self.cfg.replicateConfigOnMachine(machine)

        # Reconfigure net on the target machine
        if not reconfigurenet_util.doReconfigureNet(
                self.cfg.globalParams, [machine], i_am_master=0):
            logging.error('reconfigurenet failed for %s' % machine)
            return 1

        # Start core services on the new node
        if not install_utilities.start_core(ver, home, [machine], ignore=0):
            logging.error("ERROR could not start core services on %s" %
                          machine)
            return 1
        # Add the chunkserver back
        gfs_utils.AddGFSChunkservers(ver, testver, [machine])

        # first we need to do Machine allocation.
        # this will assign things that will satisfy the constraints
        if not self.cfg.DoMachineAllocation(serversets=['workqueue-slave']):
            logging.error("ERROR doing machine allocation")
            return 1

        # now try to relllocate some servers from existing machines to the new machine
        replaced = self.cfg.AllocateServersToNewMachine(machine)
        if not replaced:
            logging.error("ERROR allocating services to the new machine")
            return 1

        # first we need to restart the babysitter
        E.exe("%s %s" % (serve_cmd, "babysit"))
        time.sleep(60)

        # Now we need to stop all the replaced services
        for server_string in replaced:
            server = serverlib.Server()
            server.InitFromName(server_string)
            replaced_type = server.servertype()
            kill_cmd = servertype.GetKillCmd(replaced_type, server.port())
            if E.execute([server.host()], kill_cmd, None, 1) != E.ERR_OK:
                logging.error("ERROR killing %s running on port %d on %s" % \
                                     (replaced_type, server.port(), server.host()))

        # we should make it active
        if not install_utilities.set_install_state(
                machine, self.cfg.getGlobalParam('ENTERPRISE_HOME'), "ACTIVE"):
            logging.error("ERROR changing state on machine %s. "
                          "Please make it active and activate and "
                          "start crawl service on it" % machine)
            return 1

        crawl_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                          "./crawl_service.py %s" % (
          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
          self.cfg.getGlobalParam('ENTERPRISE_HOME'))
        if E.execute([machine], "%s %s" %
                     (crawl_cmd, "start"), None, 1) != E.ERR_OK:
            logging.error("Could not start crawl service on %s" % machine)
            return 1

        # save all the params
        self.cfg.saveParams()

        # for faster crawl recovery, lets restart all crawl processes
        self.restart_crawl_processes(serve_cmd)

        # activate the crawl and logcontrol service on the remote machine
        crawl_activate_cmd = "/etc/rc.d/init.d/crawl_%s activate >&/dev/null" \
                               "</dev/null" % self.cfg.getGlobalParam('VERSION')
        if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                                self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                                "-e",
                                crawl_activate_cmd),
                         None, 0) != E.ERR_OK:
            logging.error("Could not activate crawl service on machine %s" %
                          machine)
            logging.error("Please activate by hand")
            return 1
        log_activate_cmd = "/etc/rc.d/init.d/logcontrol_%s activate >&/dev/null" \
                               "</dev/null" % self.cfg.getGlobalParam('VERSION')
        if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                                self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                                "-e",
                               log_activate_cmd),
                         None, 0) != E.ERR_OK:
            logging.error(
                "Could not activate logcontrol service on machine %s" %
                machine)
            logging.error("Please activate by hand")
            return 1

        serve_activate_cmd = "/etc/rc.d/init.d/serve_%s activate >&/dev/null" \
                               "</dev/null" % self.cfg.getGlobalParam('VERSION')
        if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                                self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                                "-e",
                               serve_activate_cmd),
                         None, 0) != E.ERR_OK:
            logging.error("Could not activate serve service on machine %s" %
                          machine)
            logging.error("Please activate by hand")
            return 1

        logging.info("Machine %s successfully added into the system" % machine)

        if not mail_already_sent(M.MSG_MACHINEADDED % machine):
            SendMail.send(self.cfg, None, false, M.MSG_MACHINEADDED % machine,
                          "", true)
        return 0
  def createUser(self, creatorName, ip,
                 newUserName,
                 newUserPassword,
                 newUserEmail,
                 newUserAccountType,
                 newUserPermissions):
    """
    Creates a new user given:
      creatorName - the username who creates the user
      ip - from which ip is created
      newUserXXXX - corresponding data for the new user
      isEncrypted - if the password given is encrypted

    Upon creation we send a confirmation email to the creator and a
    welcome message to to the new user (with password included)

    returns an error code (see at the top of the file)
    """

    self.updatelock.acquire()

    try:
      # Pass the creator name when getting the user file
      (err, users) = self.get_checked_users(name = creatorName)
      if err != USER_OK:
        logging.error("Error %s while reading the users file. user create "\
                      " failed" % err)
        return CREATE_UNKNOWN

      if newUserName  in users.keys():
        logging.error("User %s already exists. Cannot re-create it" % (
          newUserName))
        return CREATE_USEREXISTS

      if len(newUserPassword) == 0:
        newUserPassword = password.createRandomPasswd(PASSWORD_LENGTH);

      # validate the user name
      if not entconfig.IsNameValid(newUserName):
        logging.error("Invalid user name %s -- cannot create" % (newUserName))
        return CREATE_INVALIDUSERNAME

      # $TODO$ -- add email validation
      if " " in newUserEmail:
        logging.error("Invalid email %s while creating user %s" % (
          newUserEmail, newUserName))
        return CREATE_INVALIDEMAIL

      decryptedPasswd = newUserPassword
      urandom = open('/dev/urandom')
      salt = urandom.read(2)
      urandom.close()
      newUserPassword = password.sha1_base64_hash(newUserPassword, salt)
      newSalt = base64.encodestring(salt)[:-1]

      users[newUserName] = UserData(newUserName,
                                    newUserPassword,
                                    newSalt,
                                    newUserEmail,
                                    newUserAccountType,
                                    newUserPermissions)
      self.save_passwd_file(users)

      if not self.update_vmanage_password(newUserName, newUserPassword, newSalt):
        logging.error("Error updating vmanager password for user %s" %
                      newUserName)
    finally:
      self.updatelock.release()

    self.sync_password_file()

    if creatorName:
      creatorEmail = users[creatorName].email
    else:
      creatorEmail = None
    accountType = users[newUserName].AccountTypePrintName()

    # and send email, first, to the creator
    if creatorEmail:
      SendMail.send(self.cfg, creatorEmail, false,
                    M.MSG_NEWUSERPASSWORDSUBJECT % newUserName,
                    M.MSG_NEWUSERPASSWORD % ( newUserName, accountType,
                                              newUserEmail, ip,
                                              creatorName, creatorEmail ),
                    false)

    # next, to the created
    rootURI = "http://%s:8000" % self.cfg.getGlobalParam("EXTERNAL_WEB_IP")

    SendMail.send(self.cfg, newUserEmail, false,
                  M.MSG_WELCOMENEWUSERSUBJECT,
                  M.MSG_WELCOMENEWUSER % ( accountType, creatorEmail,
                                           newUserName, decryptedPasswd,
                                           rootURI, creatorEmail ),
                  false)
    logging.info("User %s [email %s] created OK by %s" % (
      newUserName, newUserEmail, creatorName))
    return CREATE_OK
  def prereq_check(self, send_email, collections):
    """
    This checks the prerequisites for all collection, updates the epochs to
    serve from and (optionally) sends a mail.
    """
    if collections != None:
      collections = string.strip(collections)
    if not collections:
      collections = ent_collection.ListCollections(self.cfg.globalParams)
    else:
      collections = map(lambda c, p = self.cfg.globalParams:
                        ent_collection.EntCollection(c, p),
                        map(string.strip, string.split(collections, ","))
                        )
    # No collections -- exit quickly
    if not collections:
      return {}

    send_email = string.atoi(send_email)
    epochs = self.cfg.getGlobalParam('ENTERPRISE_EPOCHS')
    gwssers = self.cfg.globalParams.GetServerHostPorts("web")
    jobs = []
    for c in collections:
      collection = ent_collection.EntCollection(c, self.cfg.globalParams)
      # Write the testwords in a copy file
      filename = collection.get_var('TESTWORDS')
      filename_copy = "%s_" % filename
      open(filename_copy, "w").write(open(filename, "r").read())
      num = collection.get_var("TESTWORDS_IN_FIRST")
      jobs.append((self.cfg, gwssers, c, filename_copy, epochs, num))


    # Lock a file so we test once at a time
    lock_file = "%s/prerequisites_lock" % self.cfg.getGlobalParam("TMPDIR")
    flock = E.acquire_lock(lock_file, 12)

    try:
      # Run the tests -- one per thread ...
      # see how many threads to spawn
      if len(jobs) >= NUM_THREADS:
        num_threads = NUM_THREADS
      else:
        num_threads = len(jobs)

      # create the threads - workers
      threads = []
      for n in range(0, num_threads):
        threads.append(Runner(n, jobs))

      # start the threads
      for thread in threads[:-1]:
        thread.start()
      # I run the last one
      threads[-1].run()

      # wait to collect the errors at the end
      errors = threads[-1].errors
      max_epochs = threads[-1].max_epochs
      for thread in threads[:-1]:
        thread.join()
        for k, v in thread.max_epochs.items():
          max_epochs[k] = v
        for k, v in thread.errors.items():
          errors[k] = v

      # prepare and send a nice :) message
      if errors and send_email:
        last_msg_time = self.cfg.getGlobalParam('LAST_PREREQUISITES_EMAIL_TIME')
        email_interval  = self.cfg.getGlobalParam('ENTERPRISE_INTER_EMAIL_TIME')
        now = int(time.time())
        if now - last_msg_time > email_interval:
          msg = [M.MSG_PREREQ_FAIL]
          msg.extend(map(
            lambda (c, e): "Collection %s generated a wrong answer for %s" %
            (c, string.join(e, ",")), errors.items()))
          SendMail.send(self.cfg, None, 1, M.MSG_PREREQ_FAIL_SUBJECT,
                        string.join(msg, "\n"), 1)
          self.cfg.globalParams.set_var('LAST_PREREQUISITES_EMAIL_TIME', now)

      self.cfg.globalParams.set_var('LAST_PREREQUISITES_CHECK',
                                    time.strftime("%Y/%m/%d %H:%M:%S"))

      epochs.sort()
      cur_epoch = epochs[-1]
      for c in collections:
        collection = ent_collection.EntCollection(c, self.cfg.globalParams)
        collection.set_var('LAST_PREREQUISITES_ERRORS', errors.get(c, []))
        # EPOCH_SERVING has two values in the form of "es[0] es[1]"
        # es[0]: the epoch the prereq_check ask us to serve, or
        #        -1 means no epoch answers OK,
        #        -2 means current index answers OK
        # es[1]: the epoch the user set from UI, if -2 means use
        #        most recent valid epoch
        # the serving logic is as following:
        # -- if user set a sepcific epoch (es[1]) >= 0), serve es[1]
        # -- if user set most recent valid epoch (es[1] == -2), then
        #      serve the current index if no/all epochs answers ok
        #       (es[0] == -1 or es[0] == -2 )
        #      otherwise (es[0] >= 0) serve from the es[0]
        #
        es = string.split(string.strip(
          open(collection.get_var('EPOCHS_SERVING'), "r").read()), " ")

        # The epoch prereq_check asks us to serve
        # this from -- -2 means current index is OK,
        # -1 means no epoch answers OK (is returned by the checker)
        epoch = max_epochs.get(c, -2)
        if not errors.has_key(c): epoch = -2
        # initialize EPOCHS_SERVING
        if not es or len(es) == 1:
          es = [epoch, -2]
        else:
          es = map(string.atoi, es)
          # if this change cause automatic rollback, which means
          # - user choose the most recent valid epoch and
          # - the new epoch differs from previous epoch and
          # - the change is not from -1 -> -2 or -2 -> -1.
          # we log it in AdminRunner Operations log
          if  es[1] == -2 and  epoch != es[0] and ( es[0] + epoch != -3 ) :
            epochs_to_time = self.cfg.getGlobalParam('ENTERPRISE_EPOCHS_ENDTIME')
            epoch_time = epochs_to_time.get(epoch, M.MSG_EPOCH_CURRENT_TIME)
            self.writeAdminRunnerOpMsg(M.MSG_UI_LOG_INDEX_ROLLBACK % epoch_time)
          es[0] = epoch

        collection.set_file_var_content('EPOCHS_SERVING',
                                        string.join(map(str, es)), 0)

        # also check if the current serving epoch for the collection
        # is the most recent one, if not, send a warning email
        if send_email and ( ( es[1] == -2 and es[0] >= 0 ) or \
                            (es[1] >= 0 and es[1] != cur_epoch ) ) :
          last_msg_time = self.cfg.getGlobalParam(
            'LAST_SERVING_EPOCH_WARNING_EMAIL_TIME')
          email_interval  = self.cfg.getGlobalParam(
            'ENTERPRISE_INTER_EMAIL_TIME')
          now = int(time.time())
          if now - last_msg_time > email_interval:
            SendMail.send(self.cfg, None, 0,
                          M.MSG_SERVING_EPOCH_NOT_CURRENT % c, "", 0)
            self.cfg.globalParams.set_var(
              'LAST_SERVING_EPOCH_WARNING_EMAIL_TIME', now)

      self.cfg.saveParams()
    finally:
      flock.close()

    return errors
  def add(self, machine, apc_outlet):
    """
    This adds a machine to the configuration
    """
    # We can add a machine only when we are in active state
    if install_utilities.install_state(self.cfg.getGlobalParam('VERSION')) != "ACTIVE":
      logging.error("Can add a machine only when we are in active state")
      return 1

    # First test for accessibility of the machine.
    if E.execute([machine], 'echo 1', None, 1) != E.ERR_OK:
      logging.error("Could not ssh into the machine %s" % machine)
      return 1

    # start the svs on the remote machine
    restart_svs_cmd = "%s/local/google3/enterprise/legacy/util/svs_utilities.py %s %s" % (
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          machine)
    if E.execute([E.getCrtHostName()],
                     SECURE_WRAPPER_COMMAND % ( \
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          "-p2",
                          restart_svs_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not start svs on machine %s" % machine)
      return 1

    # wait for some time for svs to come up
    time.sleep(5)
    # check to see if the svs is up and is the right version
    if not svs_utilities.PingAndCheckSvsVersion(
                          self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
                          self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                          machine):
      logging.error("Svs not running correctly on machine %s" % machine)
      return 1
    ver = self.cfg.getGlobalParam('VERSION')
    home = self.cfg.getGlobalParam('ENTERPRISE_HOME')
    testver = install_utilities.is_test(ver)

    # update MACHINES
    machines = self.cfg.getGlobalParam('MACHINES')
    if machine not in machines:
      machines.append(machine)
    self.cfg.setGlobalParam('MACHINES', machines)

    ret = core_utils.RemDeadNode(ver, testver, machine)
    if ret:
      logging.error('Cannot remove dead node from lockserver.')
      # we ignore this error for now

    # We just added a new machine into the config
    # this will lead to a change in concentrator config
    # so we need to re-run serve service which will
    # write the new config and restart the concentrator
    serve_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                      "./serve_service.py %s" % (
      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'))
    E.exe("%s %s" % (serve_cmd, "babysit"))

    num_tries = 5
    cur_try = 0
    while cur_try < num_tries:
      cur_try = cur_try + 1
      all_disks = self.cfg.mach_param_cache.GetFact("mounted-drives", machine) 
      bad_disks = self.cfg.mach_param_cache.GetFact("var_log_badhds", machine) 
      if bad_disks and all_disks:
        break
      time.sleep(60)
    if all_disks == None or bad_disks == None:
      logging.error("Could not get machine information about %s" % machine)
      return 1

    bad_disks = string.split(bad_disks, ' ')
    all_disks = string.split(all_disks, ' ')
    good_disks = filter(lambda x, y=bad_disks: x not in y, all_disks)
    good_disks = map(lambda x: "%s3" % x, good_disks)
    # change sda3 to hda3 etc.
    good_disks = map(lambda x: re.sub(r'^s', 'h', x), good_disks)

    # Preprocess disks before adding to remove duplicates.
    unique_good_disks = []
    [unique_good_disks.append(disk) for disk in good_disks if disk not in unique_good_disks]

    # Add disks
    self.updatedisk(machine, unique_good_disks, true)

    # apc map update
    apc_map = self.cfg.globalParams.var_copy('APC_MAP')
    apc_map[machine] = apc_util.PortMap(apc_outlet)
    if not self.cfg.setGlobalParam('APC_MAP', apc_map):
      logging.error("ERROR setting apc map to %s" % repr(apc_map))
      return 1

    # create appropriate datadirs on that machine
    if not self.cfg.createDataDirs([machine], node_replacement = 1):
      logging.error("ERROR could not create datadirs on machine %s" % machine)
      return 1

    # Replicate the config
    self.cfg.replicateConfigOnMachine(machine)

    # Reconfigure net on the target machine
    if not reconfigurenet_util.doReconfigureNet(self.cfg.globalParams,
                                                [machine], i_am_master=0):
      logging.error('reconfigurenet failed for %s' % machine)
      return 1

    # Start core services on the new node
    if not install_utilities.start_core(ver, home, [machine], ignore=0):
      logging.error("ERROR could not start core services on %s" % machine)
      return 1
    # Add the chunkserver back
    gfs_utils.AddGFSChunkservers(ver, testver, [machine])

    # first we need to do Machine allocation.
    # this will assign things that will satisfy the constraints
    if not self.cfg.DoMachineAllocation(serversets=['workqueue-slave']):
      logging.error("ERROR doing machine allocation")
      return 1

    # now try to relllocate some servers from existing machines to the new machine
    replaced = self.cfg.AllocateServersToNewMachine(machine)
    if not replaced:
      logging.error("ERROR allocating services to the new machine")
      return 1

    # first we need to restart the babysitter
    E.exe("%s %s" % (serve_cmd, "babysit"))
    time.sleep(60)

    # Now we need to stop all the replaced services
    for server_string in replaced:
      server = serverlib.Server()
      server.InitFromName(server_string)
      replaced_type = server.servertype()
      kill_cmd = servertype.GetKillCmd(replaced_type, server.port())
      if E.execute([server.host()], kill_cmd, None, 1) != E.ERR_OK:
        logging.error("ERROR killing %s running on port %d on %s" % \
                             (replaced_type, server.port(), server.host()))


    # we should make it active
    if not install_utilities.set_install_state(machine,
                             self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                             "ACTIVE"):
      logging.error("ERROR changing state on machine %s. "
                    "Please make it active and activate and "
                    "start crawl service on it" % machine)
      return 1

    crawl_cmd = ". %s && cd %s/local/google3/enterprise/legacy/scripts && " \
                      "./crawl_service.py %s" % (
      self.cfg.getGlobalParam('ENTERPRISE_BASHRC'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'),
      self.cfg.getGlobalParam('ENTERPRISE_HOME'))
    if E.execute([machine], "%s %s" % (crawl_cmd, "start"), None, 1) != E.ERR_OK:
      logging.error("Could not start crawl service on %s" % machine)
      return 1

    # save all the params
    self.cfg.saveParams()

    # for faster crawl recovery, lets restart all crawl processes
    self.restart_crawl_processes(serve_cmd)

    # activate the crawl and logcontrol service on the remote machine
    crawl_activate_cmd = "/etc/rc.d/init.d/crawl_%s activate >&/dev/null" \
                           "</dev/null" % self.cfg.getGlobalParam('VERSION')
    if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                            "-e",
                            crawl_activate_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not activate crawl service on machine %s" % machine)
      logging.error("Please activate by hand")
      return 1
    log_activate_cmd = "/etc/rc.d/init.d/logcontrol_%s activate >&/dev/null" \
                           "</dev/null" % self.cfg.getGlobalParam('VERSION')
    if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                            "-e",
                           log_activate_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not activate logcontrol service on machine %s" % machine)
      logging.error("Please activate by hand")
      return 1

    serve_activate_cmd = "/etc/rc.d/init.d/serve_%s activate >&/dev/null" \
                           "</dev/null" % self.cfg.getGlobalParam('VERSION')
    if E.execute([machine], SECURE_WRAPPER_COMMAND % ( \
                            self.cfg.getGlobalParam('ENTERPRISE_HOME'),
                            "-e",
                           serve_activate_cmd),
                     None, 0) != E.ERR_OK:
      logging.error("Could not activate serve service on machine %s" % machine)
      logging.error("Please activate by hand")
      return 1

    logging.info("Machine %s successfully added into the system" % machine)

    if not mail_already_sent(M.MSG_MACHINEADDED % machine):
      SendMail.send(self.cfg, None, false,
                  M.MSG_MACHINEADDED % machine, "", true)
    return 0