Ejemplo n.º 1
0
def threadable_process_node(node_list):
    """
  <Purpose>
    The parent function that calls child functions to do the little work. From
    this function we can see the order of events:
      1. upload tar
      2. check that we got a response (if not add to unreachable for later)
      3. run cleaning/setup scripts on remote machine
      4. run actual test scripts on remote machine
          (files are grabbed after all scripts execute, called from step4)

  <Arguments>
    node_list:
      a list containing a single tuple of (user, remotehost)

  <Exceptions>
    None.

  <Side Effects>
    Modifies running thread counter.

  <Returns>
    None.
  """

    try:

        # node is a list containing one tuple
        node = node_list[0]

        # upload the .tar file.
        # attempt to upload the .tar file to the computers. this'll modify a list of
        # computers that we didn't connect to succesfully,so we'll remove them from
        # the list of computers we want to run the rest of the scripts on.

        threadable_remote_upload_tar(node_list)

        # only continue if node was marked reachable
        if deploy_threading.node_was_reachable(node):
            # clean the node
            threadable_remote_cleanup_all(node_list)
            # run the scripts remotely now
            threadable_remote_run_all(node_list)
            # cleanup the files, but only if it's not an instructional machine
            # the reason for this is because it's NFS and files could still be
            # in use by the other machines. we'll add this to a special list
            # in our thread_communications dict and we'll then clean these up
            # when all threads are totally done
            if not node[1].startswith('128.'):
                threadable_cleanup_final(node_list)
            else:
                # check if array exists already
                deploy_threading.add_instructional_node(node)

        # decrement # of threads running
    except Exception, e:
        deploy_logging.logerror("Error in thread assigned to "+node[1]+\
            " threadable_process_node ("+str(e)+")")
Ejemplo n.º 2
0
def threadable_process_node(node_list):
  """
  <Purpose>
    The parent function that calls child functions to do the little work. From
    this function we can see the order of events:
      1. upload tar
      2. check that we got a response (if not add to unreachable for later)
      3. run cleaning/setup scripts on remote machine
      4. run actual test scripts on remote machine
          (files are grabbed after all scripts execute, called from step4)

  <Arguments>
    node_list:
      a list containing a single tuple of (user, remotehost)

  <Exceptions>
    None.

  <Side Effects>
    Modifies running thread counter.

  <Returns>
    None.
  """
  
  try:

    # node is a list containing one tuple
    node = node_list[0]

    # upload the .tar file.
    # attempt to upload the .tar file to the computers. this'll modify a list of
    # computers that we didn't connect to succesfully,so we'll remove them from
    # the list of computers we want to run the rest of the scripts on.

    threadable_remote_upload_tar(node_list)

    # only continue if node was marked reachable
    if deploy_threading.node_was_reachable(node):
      # clean the node
      threadable_remote_cleanup_all(node_list)
      # run the scripts remotely now
      threadable_remote_run_all(node_list)
      # cleanup the files, but only if it's not an instructional machine
      # the reason for this is because it's NFS and files could still be
      # in use by the other machines. we'll add this to a special list
      # in our thread_communications dict and we'll then clean these up
      # when all threads are totally done
      if not node[1].startswith('128.'):
        threadable_cleanup_final(node_list)
      else:
        # check if array exists already
        deploy_threading.add_instructional_node(node)
        
    # decrement # of threads running
  except Exception, e:
    deploy_logging.logerror("Error in thread assigned to "+node[1]+\
        " threadable_process_node ("+str(e)+")")
Ejemplo n.º 3
0
def threadable_remote_upload_tar(remote_machines):
    """
  <Purpose>
    Uploads the deploy.tar to each machine before running anything. Machines
      that timeout are added to the unreachable_hosts list in the dictionary.

  <Arguments>
    remote_machines:
      list of tuples with (user, ip) IPs that we have to cleanup.

  <Exceptions>
    None.

  <Side Effects>
    Temporarily locks thread_communications dict which is used by other threads trying
    to upload (if they run into an error).

  <Returns>
    None.
  """

    # Assume single element if it's not a list
    if type(remote_machines) != type([]):
        remote_machines = [remote_machines]

    # for every machine in our list...
    for machine_tuple in remote_machines:

        # split up the tuple
        username = machine_tuple[0]
        machine = machine_tuple[1]

        deploy_logging.log('Setup',
                           'Attemping tar file upload via scp on ' + machine)
        scp_errcode, scp_stdout, scp_stderr = upload_tar(
            username, str(machine))

        out, err = deploy_logging.format_stdout_and_err(scp_stdout, scp_stderr)

        # check the error codes
        if str(scp_errcode) == '0':
            deploy_logging.log('Setup',
                               ' scp file upload complete on ' + machine)
        elif str(scp_errcode) == '1':
            deploy_logging.logerror('Could not establish a connection with ' +
                                    machine + ' (' + err + ')')
            deploy_threading.add_unreachable_host((username, machine))
        else:
            deploy_logging.logerror('scp returned unknown error code ' +
                                    str(scp_errcode) + ' (' + err + ')')
            deploy_threading.add_unreachable_host((username, machine))
Ejemplo n.º 4
0
def threadable_remote_upload_tar(remote_machines):
  """
  <Purpose>
    Uploads the deploy.tar to each machine before running anything. Machines
      that timeout are added to the unreachable_hosts list in the dictionary.

  <Arguments>
    remote_machines:
      list of tuples with (user, ip) IPs that we have to cleanup.

  <Exceptions>
    None.

  <Side Effects>
    Temporarily locks thread_communications dict which is used by other threads trying
    to upload (if they run into an error).

  <Returns>
    None.
  """

  # Assume single element if it's not a list
  if type(remote_machines) != type([]):
    remote_machines = [remote_machines]
  
  # for every machine in our list...
  for machine_tuple in remote_machines:
    
    # split up the tuple
    username = machine_tuple[0]
    machine = machine_tuple[1]

    deploy_logging.log('Setup', 'Attemping tar file upload via scp on '+machine)
    scp_errcode, scp_stdout, scp_stderr = upload_tar(username, str(machine))

    out, err = deploy_logging.format_stdout_and_err(scp_stdout, scp_stderr)

    # check the error codes
    if str(scp_errcode) == '0':
      deploy_logging.log('Setup', ' scp file upload complete on '+machine)
    elif str(scp_errcode) == '1':
      deploy_logging.logerror('Could not establish a connection with '+machine+' ('+err+')')
      deploy_threading.add_unreachable_host((username, machine))
    else:
      deploy_logging.logerror('scp returned unknown error code '+str(scp_errcode)+' ('+err+')')
      deploy_threading.add_unreachable_host((username, machine))
Ejemplo n.º 5
0
def upload_tar(user, remote_host, tar_filename="deploy.tar"):
    """
  <Purpose>
    This function will upload the tar to the remote_host via scp by logging 
    in as user@remote_host, and log the return code as well as anything
    printed to stderr or stdout (which is expected to be empty).
    
    Uses remote_upload_file to upload the actual file.
     
  <Arguments>
    user:
      the user to log in as on the remote machine.
    remote_host:
      the remote machine's IP to which we'll be uploading files.
    tar_filename: 
      Optional. Default is deploy.tar. The tar file to upload to the remote 
        host.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    A tuple: (returncode, stdout, stderr)
  """

    # call helper to scp
    stdoutdata, stderrdata, returncode = deploy_network.remote_upload_file(
        tar_filename, user, remote_host)

    # check the return code..
    if returncode == 0:
        deploy_logging.log(remote_host, 'Successfully uploaded deploy.tar')
    else:
        deploy_logging.logerror(remote_host + ': Trouble uploading deploy.tar')

    return (str(returncode), stdoutdata, stderrdata)
Ejemplo n.º 6
0
def upload_tar(user, remote_host, tar_filename = "deploy.tar"):
  """
  <Purpose>
    This function will upload the tar to the remote_host via scp by logging 
    in as user@remote_host, and log the return code as well as anything
    printed to stderr or stdout (which is expected to be empty).
    
    Uses remote_upload_file to upload the actual file.
     
  <Arguments>
    user:
      the user to log in as on the remote machine.
    remote_host:
      the remote machine's IP to which we'll be uploading files.
    tar_filename: 
      Optional. Default is deploy.tar. The tar file to upload to the remote 
        host.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    A tuple: (returncode, stdout, stderr)
  """

  # call helper to scp  
  stdoutdata, stderrdata, returncode = deploy_network.remote_upload_file(tar_filename, user, remote_host)

  # check the return code..
  if returncode == 0:
    deploy_logging.log(remote_host, 'Successfully uploaded deploy.tar')
  else:
    deploy_logging.logerror(remote_host+': Trouble uploading deploy.tar')
  
  return (str(returncode), stdoutdata, stderrdata)
Ejemplo n.º 7
0
def init():
    """
  <Purpose>
    Initializes all the globals and things to the default values and
    starts the thread that deals with killing processes started that
    have timed out.
    
  <Arguments>
    None.    
    
  <Exceptions>
    Critical exception thrown if thread monitor could not be started.

  <Side Effects>
    None.

  <Returns>
    Boolean.
    
    True on success.
    False on failure.
  """

    # initialize, keep track of how many threads are running
    thread_communications['threads_running'] = 0

    # set the kill flag to false and start the thread monitoring pids
    thread_communications['kill_flag'] = False

    # tells the module it has been initialized
    thread_communications['init'] = True

    try:
        thread.start_new_thread(pid_timeout, ())
    except Exception, e:
        deploy_logging.logerror("Trouble starting pid thread monitor")
        return False
def init():
  """
  <Purpose>
    Initializes all the globals and things to the default values and
    starts the thread that deals with killing processes started that
    have timed out.
    
  <Arguments>
    None.    
    
  <Exceptions>
    Critical exception thrown if thread monitor could not be started.

  <Side Effects>
    None.

  <Returns>
    Boolean.
    
    True on success.
    False on failure.
  """

  # initialize, keep track of how many threads are running
  thread_communications['threads_running'] = 0

  # set the kill flag to false and start the thread monitoring pids
  thread_communications['kill_flag'] = False
  
  # tells the module it has been initialized
  thread_communications['init'] = True
  
  try:
    thread.start_new_thread(pid_timeout, ())
  except Exception, e:
    deploy_logging.logerror("Trouble starting pid thread monitor")
    return False
Ejemplo n.º 9
0
def deploy():
  """
  <Purpose>
    This function is the brains behind the deploy script. All the main calls
    originate from this function.

    -Gets list of remote hosts from a file
    -Calls function to execute cleanup/setup on remote hosts before
      we can run remote scripts and then that same function executes
      the remote script files

  <Arguments>
    None.

  <Exceptions>
    Exit if hostlist file was not found.

  <Side Effects>
    None.

  <Returns>
    None.
  """

  # Get list of hosts
  myhosts = get_remote_hosts_from_file()

  if not myhosts: # if we didn't find any hosts.. crap out!
    print "Didn't find any remote hosts file!"
    deploy_logging.logerror("Didn't find any remote hosts file!")
    # return if we don't have instructional machines to process
    if 'machine_list' not in deploy_threading.thread_communications.keys():
      return
  else:
    # check if we also have intructional machines, and if we do, then
    # make sure we're not being tricked - remove all instructional machines
    # from the myhosts list
    if 'machine_list' in deploy_threading.thread_communications.keys():
      # we have instructional machines
      machine_list = deploy_threading.thread_communications['machine_list']
      myhosts = list(set(myhosts)-set(machine_list))
  
  # initialize thread_communications dictionary to a list which will have
  # our unreachable hosts
  deploy_threading.thread_communications['unreachable_host'] = []

  # this will keep track of the proc id's that are launched on different
  # threads. These are ssh/scp processes. We keep track of these because
  # we want to make sure that when we exit deploy.py, we kill all of these
  # processes - they should be killed by that time unless there was some kind 
  # of error.
  deploy_threading.thread_communications['running_process_ids'] = []
  
  # initial run
  connect_and_do_work(myhosts)

  # now do the same for the instructional machines if we have any:
  if 'machine_list' in deploy_threading.thread_communications.keys():
    connect_and_do_work(deploy_threading.thread_communications['machine_list'], 3)
  

  # if we had unreachable hosts..    
  if deploy_threading.has_unreachable_hosts():
    # Currently, set NOT to retry hosts.  Since it's running regularly as a service,
    # there is no need as 99% of these hosts time out anyway, so it just takes
    # a lot longer than it should. 
    for i in range(0):      
      
      # increase timeout time by 25% each time
      deploy_network.default_connection_timeout =\
          str(int(float(deploy_network.default_connection_timeout) * 1.25))
      
      # 1. use list of unreachable hosts list as our list to retry
      last_failed_hosts = deploy_threading.thread_communications['unreachable_host']

      # 2. reset the unreachable hosts list
      deploy_threading.thread_communications['unreachable_host'] = []
      deploy_logging.log("Notice", "Trying to connect to failed hosts (connection attempt #"+str(i+2)+")")
      connect_and_do_work(last_failed_hosts)
  
  
  print "Checking that all child threads/processes are dead..."
  for each_tuple in deploy_threading.thread_communications['running_process_ids']:
    try:
      # tuple is (pid, expiretime, remotehost, username)
      procid = int(each_tuple[0])
      os.kill(procid, 9)
    except OSError, ose:
      pass
    except Exception, e:
      print "Something went wrong while trying to kill process "+\
          str(procid)+", "+str(e)
Ejemplo n.º 10
0

      # if first chars match what we want ('!user:'******'!user:'******'!user:'******'\n '):
          # and ignore comments (lines starting with #)
          if line.strip('\n ')[0] != '#':
            # if we get here, then we have an IP so we need to  check that 
            # user is not empty.. log err if it is and complain.
            if not current_username:
              deploy_logging.logerror('Critical Error: No username specified for remote host group!')
              file_of_ips.close()
              return False

            # add (username, remote_host) pair while casting remote_host to lowercase in case
            # it's a hostname for easy comparison if needed everywhere
            users_ip_tuple_list.append((current_username, line.rstrip('\n ').lower()))
            # set flag that we have at least one ip
            have_one_ip = True

    # return true only if we have at least ONE ip that we added to the list 
    # and not just a bunch of users
    if have_one_ip:
      # lets make the list a set, which is a cheap way of getting rid of
      # duplicates, then cast back to list.
      finalized_list = list(set(users_ip_tuple_list))
Ejemplo n.º 11
0
def deploy():
    """
  <Purpose>
    This function is the brains behind the deploy script. All the main calls
    originate from this function.

    -Gets list of remote hosts from a file
    -Calls function to execute cleanup/setup on remote hosts before
      we can run remote scripts and then that same function executes
      the remote script files

  <Arguments>
    None.

  <Exceptions>
    Exit if hostlist file was not found.

  <Side Effects>
    None.

  <Returns>
    None.
  """

    # Get list of hosts
    myhosts = get_remote_hosts_from_file()

    if not myhosts:  # if we didn't find any hosts.. crap out!
        print "Didn't find any remote hosts file!"
        deploy_logging.logerror("Didn't find any remote hosts file!")
        # return if we don't have instructional machines to process
        if 'machine_list' not in deploy_threading.thread_communications.keys():
            return
    else:
        # check if we also have intructional machines, and if we do, then
        # make sure we're not being tricked - remove all instructional machines
        # from the myhosts list
        if 'machine_list' in deploy_threading.thread_communications.keys():
            # we have instructional machines
            machine_list = deploy_threading.thread_communications[
                'machine_list']
            myhosts = list(set(myhosts) - set(machine_list))

    # initialize thread_communications dictionary to a list which will have
    # our unreachable hosts
    deploy_threading.thread_communications['unreachable_host'] = []

    # this will keep track of the proc id's that are launched on different
    # threads. These are ssh/scp processes. We keep track of these because
    # we want to make sure that when we exit deploy.py, we kill all of these
    # processes - they should be killed by that time unless there was some kind
    # of error.
    deploy_threading.thread_communications['running_process_ids'] = []

    # initial run
    connect_and_do_work(myhosts)

    # now do the same for the instructional machines if we have any:
    if 'machine_list' in deploy_threading.thread_communications.keys():
        connect_and_do_work(
            deploy_threading.thread_communications['machine_list'], 3)

    # if we had unreachable hosts..
    if deploy_threading.has_unreachable_hosts():
        # Currently, set NOT to retry hosts.  Since it's running regularly as a service,
        # there is no need as 99% of these hosts time out anyway, so it just takes
        # a lot longer than it should.
        for i in range(0):

            # increase timeout time by 25% each time
            deploy_network.default_connection_timeout =\
                str(int(float(deploy_network.default_connection_timeout) * 1.25))

            # 1. use list of unreachable hosts list as our list to retry
            last_failed_hosts = deploy_threading.thread_communications[
                'unreachable_host']

            # 2. reset the unreachable hosts list
            deploy_threading.thread_communications['unreachable_host'] = []
            deploy_logging.log(
                "Notice",
                "Trying to connect to failed hosts (connection attempt #" +
                str(i + 2) + ")")
            connect_and_do_work(last_failed_hosts)

    print "Checking that all child threads/processes are dead..."
    for each_tuple in deploy_threading.thread_communications[
            'running_process_ids']:
        try:
            # tuple is (pid, expiretime, remotehost, username)
            procid = int(each_tuple[0])
            os.kill(procid, 9)
        except OSError, ose:
            pass
        except Exception, e:
            print "Something went wrong while trying to kill process "+\
                str(procid)+", "+str(e)
Ejemplo n.º 12
0
def remote_download_file(remote_fn_path, local_fn_path, user, remote_host, retry_on_refusal = 3, connect_timeout = default_connection_timeout):
  """
  <Purpose>
    This uses scp to download a file from a remote computer.
     
  <Arguments>
    remote_fn_path:
      The path to the file to download (remote file)
    local_fn_path:
      Where do we put it on this computer?
    user:
      user to log in as
    remote_host:
      the ip/name of the machine we're connecting to.
    retry_on_refusal:
      Optional. Integer. Has number of times to retry the connection IF it was
      refused (built in to take care of not 'spamming' the remote server)
    connect_timeout:
      Optional. Integer. Time in seconds for ssh to timeout if no response was
      received.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Tuple. (out, err, returncode)
    Details:
      out: stdout from scp
      err: err from ssh
      returncode: scp's exit code
  """
  # local_fn_path will have the path + name of file

  # get the fn by doing some string math..
  dir_to_local_file, junk, localfn = local_fn_path.rpartition('/')  

  # is the dir real?
  if not os.path.isdir(dir_to_local_file):
    deploy_logging.logerror('Local destination directory does not exist.')
    raise Exception('Please check calling method.')

  # the SCP handle used
  scp_proc_handle = subprocess.Popen('scp -o BatchMode=yes -o '+\
      'ConnectTimeout='+str(connect_timeout)+' -o StrictHostKeyChecking=no '+\
      ' '+user+'@'+remote_host+':'+remote_fn_path+\
      ' '+local_fn_path, shell = True, stdout = subprocess.PIPE, 
      stderr = subprocess.PIPE)    
  
  # set the PID of the process so we can set a timeout later
  scp_proc_pid = scp_proc_handle.pid

  # start thread to monitor timeouts (on another thread)
  deploy_threading.monitor_timeout(scp_proc_pid, int(connect_timeout), remote_host, user)

  # execute
  out, err = scp_proc_handle.communicate('')

  returncode = scp_proc_handle.returncode

  # retry if conn. was refused?
  if retry_on_refusal:
    # check if we got a connection refused. if we did, could be cuz we're spamming
    # the server, so sleep and then try again
    didwesleep = sleep_on_conn_refused(out, err, retry_on_refusal, remote_host)
    # we slept, so call function again and try to execute
    if didwesleep:
      # run again, but this time decrement retry counter
      out, err, returncode = remote_download_file(remote_fn_path, 
          local_fn_path, user, remote_host, retry_on_refusal - 1, 
          connect_timeout = default_connection_timeout)

  # format the string
  out, err = deploy_logging.format_stdout_and_err(out, err)

  return out, err, returncode
Ejemplo n.º 13
0
def remote_upload_file(local_fn_path, user, remote_host, retry_on_refusal = 3, connect_timeout = default_connection_timeout):
  """
  <Purpose>
    This uses scp to upload a file to a remote computer.
     
  <Arguments>
    local_fn_path:
      Which file do we chuck to the remote computer?
    user:
      user to log in as
    remote_host:
      the ip/name of the machine we're connecting to.
    retry_on_refusal:
      Optional. Integer. Has number of times to retry the connection IF it was
      refused (built in to take care of not 'spamming' the remote server)
    connect_timeout:
      Optional. Integer. Time in seconds for ssh to timeout if no response was
      received.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Tuple. (out, err, returncode)
    Details:
      out: stdout from scp
      err: err from ssh
      returncode: scp's exit code
  """

  # check that local file exists.
  if not os.path.isfile(local_fn_path):
    deploy_logging.logerror('Problem with local file: it does not exist!')
    raise Exception('Please check calling method.')
  
  scp_proc_handle = subprocess.Popen('scp -o BatchMode=yes -o '+\
      'ConnectTimeout='+str(connect_timeout)+' -o StrictHostKeyChecking=no '+\
      ' '+local_fn_path+' '+user+"@"+remote_host+":", shell = True, 
      stdout = subprocess.PIPE, stderr = subprocess.PIPE)

  scp_proc_pid = scp_proc_handle.pid

  # start thread to monitor timeouts (on another thread)
  deploy_threading.monitor_timeout(scp_proc_pid, int(connect_timeout), remote_host, user)

  # execute and block until done...
  out, err = scp_proc_handle.communicate('')

  returncode = scp_proc_handle.returncode

  # retry if conn. was refused?
  if retry_on_refusal:
    # check if we got a connection refused. if we did, could be cuz we're 
    # spamming the server, so sleep and then try again
    didwesleep = sleep_on_conn_refused(out, err, retry_on_refusal, remote_host)
    # we slept, so call function again and try to execute
    if didwesleep:
      # run again, but this time decrement retry counter
      out, err, returncode = remote_upload_file(local_fn_path, user, 
          remote_host, retry_on_refusal - 1, connect_timeout = default_connection_timeout)

  # format the string
  out, err = deploy_logging.format_stdout_and_err(out, err)

  return out, err, returncode
Ejemplo n.º 14
0
def remote_download_dir(remote_source_dir, local_dest_dir, user, remote_host, retry_on_refusal = 3, connect_timeout = default_connection_timeout):
  """
  <Purpose>
    This uses scp to download a directory from a remote computer.
     
  <Arguments>
    remote_source_dir:
      The path to the directory to download (remote directory)
    local_dest_dir:
      Where do we put it on this computer?
    user:
      user to log in as
    remote_host:
      the ip/name of the machine we're connecting to.
    retry_on_refusal:
      Optional. Integer. Has number of times to retry the connection IF it was
      refused (built in to take care of not 'spamming' the remote server)
    connect_timeout:
      Optional. Integer. Time in seconds for ssh to timeout if no response was
      received.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Tuple. (out, err, returncode)
    Details:
      out: stdout from scp
      err: err from ssh
      returncode: scp's exit code
  """
  # the dir one level 'up' from the our destination dir must exist, so lets 
  # grab it by doing some string math.. remove trailing . and then partition
  local_dest_dir_parent, junk, morejunk = local_dest_dir.strip('/').rpartition('/')  

  # if our local destination directory does not exist then complain.
  if not os.path.isdir(local_dest_dir_parent):
    deploy_logging.logerror(local_dest_dir)
    deploy_logging.logerror(local_dest_dir_parent)
    deploy_logging.logerror('Problem with local directory: it does not exist!')
    raise Exception('Please check calling method.')

  # get the scp handle
  scp_proc_handle = subprocess.Popen('scp -r -o BatchMode=yes -o '+
      'ConnectTimeout='+str(connect_timeout)+' -o StrictHostKeyChecking=no '+\
      user+'@'+remote_host+':'+remote_source_dir+\
      ' '+local_dest_dir, shell = True, stdout = subprocess.PIPE, 
      stderr = subprocess.PIPE)  
    
  # the pid of the scp process just started
  scp_proc_pid = scp_proc_handle.pid

  # start thread to monitor timeouts (on another thread)
  deploy_threading.monitor_timeout(scp_proc_pid, int(connect_timeout), remote_host, user)

  # execute string and block this thread until done...
  out, err = scp_proc_handle.communicate('')

  returncode = scp_proc_handle.returncode

  # retry if conn. was refused?
  if retry_on_refusal:
    # check if we got a connection refused. if we did, could be cuz we're 
    # spamming the server, so sleep and then try again
    didwesleep = sleep_on_conn_refused(out, err, retry_on_refusal, remote_host)
    # we slept, so call function again and try to execute
    if didwesleep:
      # run again, but this time decrement retry counter
      out, err, returncode = remote_download_dir(remote_source_dir, 
          local_dest_dir, user, remote_host, retry_on_refusal - 1, 
          connect_timeout = default_connection_timeout)

  # format the string
  out, err = deploy_logging.format_stdout_and_err(out, err)

  return out, err, returncode
Ejemplo n.º 15
0
def remote_get_log(user, remote_host):
  """
  <Purpose>
    Gets the remote logs (all tarred up) from remote_host and copies it to a 
    local directory via scp then untars it into deploy.logs/[remote_host]/.

  <Arguments>
    user:
      the user to log in as
    remote_host:
      the IP of the host to get the logs from
    
  <Exceptions>
    scp fails/times out.

  <Side Effects>
    None.

  <Returns>
    No returns.
  """

  try:
    # set up dir that we'll move the remote .tar into
    if not os.path.isdir('./deploy.logs/'+remote_host):
      os.mkdir('./deploy.logs/'+remote_host)
    
    # download the tar file from remote host
    out, err, returncode = remote_download_file(remote_host+'.tgz', 
        './deploy.logs/'+remote_host+'/'+remote_host+'.tgz', user, remote_host)

    deploy_logging.log('Downloading logs', 'Logs downloaded from '+remote_host)
    # now try to untar the files

    # build up a command list to execute
    command_list = []

    # tar is picky about where it'll unzip to (CWD), so we'll just Cd there
    command_list.append('cd ./deploy.logs/'+remote_host+'/')

    # now untar. if deploy_main.verbosity >=1 then we'll be verbose
    if deploy_main.verbosity >=1:
      command_list.append('tar -xvvf '+remote_host+'.tgz')
    else:
      command_list.append('tar -xf '+remote_host+'.tgz')

    # not make command string by joining the list elements with '; '  
    command_string = '; '.join(command_list)

    # execute string
    out, err, retvalue = deploy_helper.shellexec2(command_string)

    deploy_logging.log('Downloading logs', 'Logs from '+remote_host+' are ready')

    # we no longer need the tar file, just hogging up space
    os.remove('./deploy.logs/'+remote_host+'/'+remote_host+'.tgz')

  except Exception, e:
    if deploy_main.verbosity == 2:
      # Only log if we error and need to narrow this down. otherwise,
      # it gets really spammy.    
      deploy_logging.logerror(remote_host+": Some kind of err in remote_get_log. ("+\
          remote_host+") , error:"+str(e)+")")
Ejemplo n.º 16
0
    ValueError: occurs when the host to be removed is not in the array

  <Side Effects>
    None.

  <Returns>
    None.
  """
    try:
        thread_communications['hosts_left'].remove((user, remote_host))
    except ValueError, e:
        # host is already removed, keep going
        pass
    except Exception, e:
        print e
        deploy_logging.logerror("Error in remove_host_from_hosts_left: " +
                                str(e))
    else:
        # no error, decrease the running thread count
        threading_lock_and_sub()


def pid_timeout():
    """
  <Purpose>
    This function is intented to be called once and supposed to run on a 
    separate thread. Until the 'kill' flag is set, it will spin and see
    which pid's need to be killed.
    
    All process IDs are set via the set_pid_timeout method.

  <Arguments>
Ejemplo n.º 17
0
def pid_timeout():
    """
  <Purpose>
    This function is intented to be called once and supposed to run on a 
    separate thread. Until the 'kill' flag is set, it will spin and see
    which pid's need to be killed.
    
    All process IDs are set via the set_pid_timeout method.

  <Arguments>
    None.

  <Exceptions>
    OSError: the process no longer exists, ignore
    ValueError: when removing host from running hosts this means that the
      host has already been terminated.
    Any other exception is unexpected

  <Side Effects>
    None.

  <Returns>
    None.
  """
    # keeps spinning and sleeping, checking which PIDs need to be killed
    thread_communications['running_process_ids'] = []
    # while the kill flag is false. Kill flag is modified right before
    # exit
    while not thread_communications['kill_flag']:
        # sleep and wakeup every couple seconds.
        time.sleep(5)
        # this list will keep track of the pids that we've killed
        killed_pids = []

        # check the running_process_ids and see if any of them have expired
        for each_process in thread_communications['running_process_ids']:
            # each process is a tuple that consists of (pid, expiretime, hostname, username)
            process_to_kill = each_process[0]
            expire_time = each_process[1]
            remote_host = each_process[2]
            user = each_process[3]
            # if the current time is past the set expire time then we need to try and kill it
            if expire_time <= time.time():
                # try to kill process
                try:
                    # check if process is still running
                    if os.path.exists('/proc/' + str(process_to_kill)):
                        os.kill(process_to_kill, 9)
                        killed_pids.append(each_process)
                        # sleep a second, and then check that the process was killed. if
                        # not, try a 2nd and third time
                        time.sleep(1)
                        if os.path.exists('/proc/' + str(process_to_kill)):
                            # try os.kill again, and if that doesn't work, use shellexec method
                            os.kill(process_to_kill, 9)
                            time.sleep(1)
                            if os.path.exists('/proc/' + str(process_to_kill)):
                                deploy_helper.shellexec2('kill -9 ' +
                                                         str(process_to_kill))
                                time.sleep(1)
                        if remote_host:
                            deploy_logging.logerror("Forced kill of PID "+str(process_to_kill)+" due to timeout! The host"+\
                                    " on this thread is "+remote_host)
                        else:
                            deploy_logging.logerror("Forced kill of PID " +
                                                    str(process_to_kill) +
                                                    " due to timeout!")
                        # subtract from out running thread count and remove host
                        subtract_host_left([(user, remote_host)])
                    else:
                        # the process is dead, just remove host from hosts_left just in case, and
                        # remove from running pids as well, but dont sub the # of threads
                        killed_pids.append(each_process)
                        subtract_host_left([(user, remote_host)], False)

                except OSError, ose:
                    # this means no pid found and process has most likely
                    # already terminated
                    deploy_logging.logerror("Process" + str(process_to_kill) +
                                            "(" + remote_host +
                                            ") is already done.")
                    subtract_host_left([(user, remote_host)], False)
                    pass
                except Exception, e:
                    deploy_logging.logerror("Unexpected error in pid_timeout thread "+\
                      "while killing a child process: "+str(e))
Ejemplo n.º 18
0
    ValueError: occurs when the host to be removed is not in the array

  <Side Effects>
    None.

  <Returns>
    None.
  """
  try:
    thread_communications['hosts_left'].remove((user, remote_host))
  except ValueError, e:
    # host is already removed, keep going
    pass
  except Exception, e:
    print e
    deploy_logging.logerror("Error in remove_host_from_hosts_left: "+str(e))
  else:
    # no error, decrease the running thread count
    threading_lock_and_sub()

  
def pid_timeout():
  """
  <Purpose>
    This function is intented to be called once and supposed to run on a 
    separate thread. Until the 'kill' flag is set, it will spin and see
    which pid's need to be killed.
    
    All process IDs are set via the set_pid_timeout method.

  <Arguments>
Ejemplo n.º 19
0
def pid_timeout():
  """
  <Purpose>
    This function is intented to be called once and supposed to run on a 
    separate thread. Until the 'kill' flag is set, it will spin and see
    which pid's need to be killed.
    
    All process IDs are set via the set_pid_timeout method.

  <Arguments>
    None.

  <Exceptions>
    OSError: the process no longer exists, ignore
    ValueError: when removing host from running hosts this means that the
      host has already been terminated.
    Any other exception is unexpected

  <Side Effects>
    None.

  <Returns>
    None.
  """
  # keeps spinning and sleeping, checking which PIDs need to be killed
  thread_communications['running_process_ids'] = []
  # while the kill flag is false. Kill flag is modified right before
  # exit
  while not thread_communications['kill_flag']:
    # sleep and wakeup every couple seconds.
    time.sleep(5)
    # this list will keep track of the pids that we've killed
    killed_pids = []
    
    # check the running_process_ids and see if any of them have expired
    for each_process in thread_communications['running_process_ids']:
      # each process is a tuple that consists of (pid, expiretime, hostname, username)
      process_to_kill = each_process[0]
      expire_time = each_process[1]
      remote_host = each_process[2]
      user = each_process[3]
      # if the current time is past the set expire time then we need to try and kill it
      if expire_time <= time.time():
        # try to kill process
        try:
          # check if process is still running
          if os.path.exists('/proc/'+str(process_to_kill)):
            os.kill(process_to_kill, 9)
            killed_pids.append(each_process)
            # sleep a second, and then check that the process was killed. if 
            # not, try a 2nd and third time
            time.sleep(1)
            if os.path.exists('/proc/'+str(process_to_kill)):
              # try os.kill again, and if that doesn't work, use shellexec method
              os.kill(process_to_kill, 9)
              time.sleep(1)
              if os.path.exists('/proc/'+str(process_to_kill)):
                deploy_helper.shellexec2('kill -9 '+str(process_to_kill))
                time.sleep(1)
            if remote_host:
              deploy_logging.logerror("Forced kill of PID "+str(process_to_kill)+" due to timeout! The host"+\
                      " on this thread is "+remote_host)
            else:
              deploy_logging.logerror("Forced kill of PID "+str(process_to_kill)+" due to timeout!")
            # subtract from out running thread count and remove host
            subtract_host_left([(user, remote_host)])
          else:
            # the process is dead, just remove host from hosts_left just in case, and
            # remove from running pids as well, but dont sub the # of threads
            killed_pids.append(each_process)
            subtract_host_left([(user, remote_host)], False)
            
        except OSError, ose:
          # this means no pid found and process has most likely 
          # already terminated
          deploy_logging.logerror("Process"+str(process_to_kill)+"("+remote_host+") is already done.")
          subtract_host_left([(user, remote_host)], False)
          pass
        except Exception, e:
          deploy_logging.logerror("Unexpected error in pid_timeout thread "+\
            "while killing a child process: "+str(e))
Ejemplo n.º 20
0
            # if first chars match what we want ('!user:'******'!user:'******'!user:'******'\n '):
                    # and ignore comments (lines starting with #)
                    if line.strip('\n ')[0] != '#':
                        # if we get here, then we have an IP so we need to  check that
                        # user is not empty.. log err if it is and complain.
                        if not current_username:
                            deploy_logging.logerror(
                                'Critical Error: No username specified for remote host group!'
                            )
                            file_of_ips.close()
                            return False

                        # add (username, remote_host) pair while casting remote_host to lowercase in case
                        # it's a hostname for easy comparison if needed everywhere
                        users_ip_tuple_list.append(
                            (current_username, line.rstrip('\n ').lower()))
                        # set flag that we have at least one ip
                        have_one_ip = True

        # return true only if we have at least ONE ip that we added to the list
        # and not just a bunch of users
        if have_one_ip:
            # lets make the list a set, which is a cheap way of getting rid of