def checkNginxStatus(logger):
  """
    Checks to see if Nginx is running for Grok. Returns True if it is and False
    otherwise. This should be wrapped in retry logic with error handling.

    :param logger: Initialized logger

    :raises: infrastructure.utilities.exceptions.InstanceLaunchError
      If Nginx starts up using the error configuration.

    :returns: True if Nginx is running using the correct conf file for Grok.
    :rtype: boolean
  """
  output = run("ps aux | grep -e nginx -e grok-api.conf | grep -v grep")

  # If nginx launches with our error config, raise an exception
  if "grok-error.conf" in output.stdout:
    raise InstanceLaunchError("Nginx launched in Error state: \r\n %s" %
                              output.stdout)
  # Else if we're still stopped or currently loading Grok, just log a debug msg
  elif ("grok-loading.conf" in output.stdout or
        "grok-stopped.conf" in output.stdout):
    logger.debug("Nginx has not yet finished loading: \r\n %s" % output.stdout)


  return "grok-api.conf" in output.stdout
def checkGrokServicesStatus(logger):
  """
    Checks to see if all Grok Services are running. Returns True if all Services
    are running and False if any are not running.  This should be wrapped in
    retry logic with error handling.

    :param logger: Initialized logger

    :raises: infrastructure.utilities.exceptions.InstanceLaunchError
      If a Grok service fails to startup.

    :returns: True if GrokServices are running properly.
    :rtype: boolean
  """
  cmd = ("source /etc/grok/supervisord.vars && "
         "supervisorctl -c /opt/numenta/grok/conf/supervisord.conf status")
  grokServicesState = run(cmd)

  for service in grokServicesState.split("\r\n"):
    if set(["FATAL", "EXITED"]) & set(service.split(" ")):
      raise InstanceLaunchError("Some Grok services failed to start:\r\n%s" %
                                grokServicesState.stdout)
    elif set(["STOPPED", "STARTING", "UNKNOWN"]) & set(service.split(" ")):
      logger.debug("Some Grok services are not yet ready: \r\n %s" %
                   grokServicesState.stdout)
      break
  else:
    return True
  return False
Exemple #3
0
def launchInstance(amiID, config, logger):
    """
    Launch an instance using the AMI-id and other options.
    Wait until the instance is up and return the instance-id
    and it"s public dns.

    :param instanceId: The Instance ID of the EC2 Instance that will be
      stopped.

    :param config: A dict containing values for `REGION`, `AWS_ACCESS_KEY_ID`,
      and `AWS_SECRET_ACCESS_KEY`. It also needs:
      - `KEY` = The SSH pub key to use for initialization (e.g.: chef_west)
      - `INSTANCE_TYPE` = The size of EC2 instance to use (e.g.: m3.medium)
      - `JOB_NAME` = used to tag the instance launched
      - `BUILD_NUMBER` = used to tag the instance launched

    :param logger: An initialized logger from the calling pipeline.

    :raises: infrastructure.utilities.exceptions.InstanceLaunchError
      If the instance fails to launch in a set amount of time.

    :returns: A tuple containing the public DNS entry for the server and the
      EC2 Instance ID, in that order.
  """

    # Make sure we have the right security groups setup for the instance. We use
    # `basic_server` to allow SSH access from our office and specified secure IPs.
    # `grok_server` allows access to the server from ports 80 & 443 universally,
    # but can be limited if necessary.
    if config["REGION"] == "us-west-2":
        securityGroups = ["basic_server", "grok_server"]
    if config["REGION"] == "us-east-1":
        securityGroups = ["basic_server", "grok_server_east"]

    conn = getEC2Connection(config)
    image = conn.get_image(amiID)

    logger.info("Launching instance from AMI: %s", amiID)
    reservation = image.run(key_name=config["KEY"],
                            security_groups=securityGroups,
                            instance_type=config["INSTANCE_TYPE"])

    instance = reservation.instances[0]
    instanceID = instance.id

    logger.debug("Waiting for instance %s to boot.", instanceID)
    for _ in xrange(MAX_RETRIES_FOR_INSTANCE_READY):
        logger.debug("Instance state: %s", instance.state)
        if instance.state == "pending":
            sleep(SLEEP_DELAY)
            instance.update()
        elif instance.state == "running":
            break
    else:
        terminateInstance(instanceID, config, logger)
        raise InstanceLaunchError(
            "Instance took more than %d seconds to start" %
            MAX_RETRIES_FOR_INSTANCE_READY * SLEEP_DELAY)

    publicDnsName = instance.public_dns_name

    instanceTags = {}
    if os.environ.get("JENKINS_HOME"):
        instanceTags["Name"] = "%s-%s" % (os.environ["JOB_NAME"],
                                          jenkins.getBuildNumber())
    else:
        instanceTags["Name"] = "running-locally-by-user:%s" % os.getlogin()

    instanceTags["Description"] = "Testing AMI %s" % (amiID)

    if "BUILD_URL" in os.environ.keys():
        instanceTags["JENKINS_LINK"] = os.environ["BUILD_URL"]

    if "GIT_BRANCH" in os.environ.keys():
        instanceTags["GIT_BRANCH"] = os.environ["GIT_BRANCH"]

    if "GIT_COMMIT" in os.environ.keys():
        instanceTags["GIT_COMMIT"] = os.environ["GIT_COMMIT"]

    try:
        conn.create_tags([instanceID], instanceTags)
    except:
        terminateInstance(instanceID, config, logger)
        raise

    logger.info("Instance %s is running, public dns : %s", instanceID,
                publicDnsName)
    return publicDnsName, instanceID
Exemple #4
0
def runCommandBySSH(dnsName,
                    command,
                    logger,
                    maxRetries=120,
                    sleepDelay=1,
                    user=DEFAULT_USER,
                    silent=False,
                    sshKeyFile=None):
  """
  Run a command on an instance, retrying multiple times.

  :param dnsName: DNS name to run the command on. Required.

  :param command: command to run. Required.

  :param logger: An already initialized logger object. Required.

  :param maxRetries: Maximum retries before giving up on running salt

  :param sleepDelay: Time in seconds between retries

  :param user: User to run the command as.

  :param silent: If True, suppress command output to console.

  :param sshKeyFile: SSH private key to use. Use the user's keys from their
                     ssh-agent keyring if unset.

  :raises: InstanceLaunchError if the command fails to run or has a failure
           during the run.

  :returns: fabric run result of the command

  :rtype: fabric run result
  """
  if not command:
    raise InvalidParametersError("runCommandOnInstance requires a command")
  if not dnsName:
    raise InvalidParametersError("runCommandOnInstance requires a dnsName")
  if not logger:
    raise InvalidParametersError("runCommandOnInstance requires a logger")

  kwargs={ "host_string": dnsName,
           "user": user,
           "timeout": sleepDelay,
           "connection_attempts": maxRetries }
  # We only need to specify a key_filename to settings when sshKeyFile is
  # not None
  if sshKeyFile:
    kwargs["key_filename"] = sshKeyFile

  # Force fabric not to abort if the command fails or we won't be able
  # to retry
  kwargs["warn_only"] = True

  with settings(**kwargs):
    logger.debug("Running %s on %s as %s", command, dnsName, user)
    tries = 0
    while tries < maxRetries:
      tries = tries + 1
      try:
        if silent:
          with hide("output", "running"):
            result = run(command)
        else:
          result = run(command)
        if result.return_code == 0:
          logger.debug("%s completed successfully", command)
          return result
        if tries > maxRetries:
          raise InstanceLaunchError("%s failed to run", command)
        else:
          logger.debug("Try %s failed, retrying in %s seconds", tries,
                       sleepDelay)
          sleep(sleepDelay)
      except NetworkError:
        # If we can't connect to SSH, fabric raises NetworkError
        if tries > maxRetries:
          raise InstanceLaunchError("%s failed to run after %s tries",
                                    command, tries)
        logger.debug("Network error for try %s, retrying in %s seconds",
                     tries, sleepDelay)
        sleep(sleepDelay)