Esempio n. 1
0
 def bug_found(self):
     """
     Builds a crash-report when TethysCluster encounters an unhandled
     exception. Report includes system info, python version, dependency
     versions, and a full debug log and stack-trace of the crash.
     """
     dashes = '-' * 10
     header = dashes + ' %s ' + dashes + '\n'
     crashfile = open(static.CRASH_FILE, 'w')
     argv = sys.argv[:]
     argv[0] = os.path.basename(argv[0])
     argv = ' '.join(argv)
     crashfile.write(header % "SYSTEM INFO")
     crashfile.write("TethysCluster: %s\n" % __version__)
     crashfile.write("Python: %s\n" % sys.version.replace('\n', ' '))
     crashfile.write("Platform: %s\n" % platform.platform())
     dependencies = ['boto', 'paramiko', 'Crypto']
     for dep in dependencies:
         self.__write_module_version(dep, crashfile)
     crashfile.write("\n" + header % "CRASH DETAILS")
     crashfile.write('Command: %s\n\n' % argv)
     for line in logger.get_session_log():
         crashfile.write(line)
     crashfile.close()
     print
     log.error("Oops! Looks like you've found a bug in TethysCluster")
     log.error("Crash report written to: %s" % static.CRASH_FILE)
     # log.error("Please remove any sensitive data from the crash report")
     # log.error("and submit it to [email protected]")
     sys.exit(1)
Esempio n. 2
0
 def execute(self, args):
     if not args:
         cls = [
             c.cluster_tag for c in self.cm.get_clusters(load_plugins=False,
                                                         load_receipt=False)
         ]
         msg = "please specify a cluster"
         if cls:
             opts = ', '.join(cls)
             msg = " ".join([msg, '(options:', opts, ')'])
         self.parser.error(msg)
     for cluster_name in args:
         try:
             cl = self.cm.get_cluster(cluster_name)
         except exception.ClusterDoesNotExist:
             raise
         except Exception, e:
             log.debug("Failed to load cluster settings!", exc_info=True)
             log.error("Failed to load cluster settings!")
             if self.opts.force:
                 log.warn("Ignoring cluster settings due to --force option")
                 cl = self.cm.get_cluster(cluster_name,
                                          load_receipt=False,
                                          require_keys=False)
             else:
                 if not isinstance(e, exception.IncompatibleCluster):
                     log.error("Use -f to forcefully stop the cluster")
                 raise
         is_stoppable = cl.is_stoppable()
         if not is_stoppable:
             has_stoppable_nodes = cl.has_stoppable_nodes()
             if not self.opts.terminate_unstoppable and has_stoppable_nodes:
                 raise exception.BaseException(
                     "Cluster '%s' contains 'stoppable' and 'unstoppable' "
                     "nodes. Your options are:\n\n"
                     "1. Use the --terminate-unstoppable option to "
                     "stop all 'stoppable' nodes and terminate all "
                     "'unstoppable' nodes\n\n"
                     "2. Use the 'terminate' command to destroy the "
                     "cluster.\n\nPass --help for more info." %
                     cluster_name)
             if not has_stoppable_nodes:
                 raise exception.BaseException(
                     "Cluster '%s' does not contain any 'stoppable' nodes "
                     "and can only be terminated. Please use the "
                     "'terminate' command instead to destroy the cluster."
                     "\n\nPass --help for more info" % cluster_name)
         if not self.opts.confirm:
             resp = raw_input("Stop cluster %s (y/n)? " % cluster_name)
             if resp not in ['y', 'Y', 'yes']:
                 log.info("Aborting...")
                 continue
         cl.stop_cluster(self.opts.terminate_unstoppable,
                         force=self.opts.force)
         log.warn("All non-spot, EBS-backed nodes are now in a "
                  "'stopped' state")
         log.warn("You can restart this cluster by passing -x "
                  "to the 'start' command")
         log.warn("Use the 'terminate' command to *completely* "
                  "terminate this cluster")
Esempio n. 3
0
    def parse_subcommands(self, gparser=None):
        """
        Parse global arguments, find subcommand from list of subcommand
        objects, parse local subcommand arguments and return a tuple of
        global options, selected command object, command options, and
        command arguments.

        Call execute() on the command object to run. The command object has
        members 'gopts' and 'opts' set for global and command options
        respectively, you don't need to call execute with those but you could
        if you wanted to.
        """
        gparser = gparser or self.gparser
        # parse global options.
        gopts, args = gparser.parse_args()
        if not args:
            gparser.print_help()
            raise SystemExit("\nError: you must specify an action.")
        # set debug level if specified
        if gopts.DEBUG:
            console.setLevel(logger.DEBUG)
            config.DEBUG_CONFIG = True
        # load TethysClusterConfig into global options
        try:
            cfg = config.TethysClusterConfig(gopts.CONFIG)
            cfg.load()
        except exception.ConfigNotFound, e:
            log.error(e.msg)
            e.display_options()
            sys.exit(1)
Esempio n. 4
0
 def _eval_add_node(self):
     """
     This function inspects the current state of the SGE queue and decides
     whether or not to add nodes to the cluster. Returns the number of nodes
     to add.
     """
     num_nodes = len(self._cluster.nodes)
     if num_nodes >= self.max_nodes:
         log.info("Not adding nodes: already at or above maximum (%d)" %
                  self.max_nodes)
         return
     queued_jobs = self.stat.get_queued_jobs()
     if not queued_jobs and num_nodes >= self.min_nodes:
         log.info("Not adding nodes: at or above minimum nodes "
                  "and no queued jobs...")
         return
     total_slots = self.stat.count_total_slots()
     if not self.has_cluster_stabilized() and total_slots > 0:
         return
     running_jobs = self.stat.get_running_jobs()
     used_slots = sum([int(j['slots']) for j in running_jobs])
     qw_slots = sum([int(j['slots']) for j in queued_jobs])
     slots_per_host = self.stat.slots_per_host()
     avail_slots = total_slots - used_slots
     need_to_add = 0
     if num_nodes < self.min_nodes:
         log.info("Adding node: below minimum (%d)" % self.min_nodes)
         need_to_add = self.min_nodes - num_nodes
     elif total_slots == 0:
         # no slots, add one now
         need_to_add = 1
     elif qw_slots > avail_slots:
         log.info("Queued jobs need more slots (%d) than available (%d)" %
                  (qw_slots, avail_slots))
         oldest_job_dt = self.stat.oldest_queued_job_age()
         now = self.get_remote_time()
         age_delta = now - oldest_job_dt
         if age_delta.seconds > self.longest_allowed_queue_time:
             log.info("A job has been waiting for %d seconds "
                      "longer than max: %d" %
                      (age_delta.seconds, self.longest_allowed_queue_time))
             if slots_per_host != 0:
                 need_to_add = qw_slots / slots_per_host
             else:
                 need_to_add = 1
         else:
             log.info("No queued jobs older than %d seconds" %
                      self.longest_allowed_queue_time)
     max_add = self.max_nodes - len(self._cluster.running_nodes)
     need_to_add = min(self.add_nodes_per_iteration, need_to_add, max_add)
     if need_to_add > 0:
         log.warn("Adding %d nodes at %s" %
                  (need_to_add, str(utils.get_utc_now())))
         try:
             self._cluster.add_nodes(need_to_add)
             self.__last_cluster_mod_time = utils.get_utc_now()
             log.info("Done adding nodes at %s" %
                      str(self.__last_cluster_mod_time))
         except Exception:
             log.error("Failed to add new host", exc_info=True)
Esempio n. 5
0
 def _eval_remove_node(self):
     """
     This function uses the sge stats to decide whether or not to
     remove a node from the cluster.
     """
     qlen = len(self.stat.get_queued_jobs())
     if qlen != 0:
         return
     if not self.has_cluster_stabilized():
         return
     num_nodes = len(self._cluster.nodes)
     if num_nodes <= self.min_nodes:
         log.info("Not removing nodes: already at or below minimum (%d)"
                  % self.min_nodes)
         return
     max_remove = num_nodes - self.min_nodes
     log.info("Looking for nodes to remove...")
     remove_nodes = self._find_nodes_for_removal(max_remove=max_remove)
     if not remove_nodes:
         log.info("No nodes can be removed at this time")
     for node in remove_nodes:
         if node.update() != "running":
             log.error("Node %s is already dead - not removing" %
                       node.alias)
             continue
         log.warn("Removing %s: %s (%s)" %
                  (node.alias, node.id, node.dns_name))
         try:
             self._cluster.remove_node(node)
             self.__last_cluster_mod_time = utils.get_utc_now()
         except Exception:
             log.error("Failed to remove node %s" % node.alias,
                       exc_info=True)
Esempio n. 6
0
 def _completer(self):
     try:
         rimages = self.ec2.registered_images
         completion_list = [i.id for i in rimages]
         return completion.ListCompleter(completion_list)
     except Exception, e:
         log.error('something went wrong fix me: %s' % e)
Esempio n. 7
0
    def execute(self, command, silent=True, only_printable=False,
                ignore_exit_status=False, log_output=True, detach=False,
                source_profile=True, raise_on_failure=True):
        """
        Execute a remote command and return stdout/stderr

        NOTE: this function blocks until the process finishes

        kwargs:
        silent - don't print the command's output to the console
        only_printable - filter the command's output to allow only printable
                         characters
        ignore_exit_status - don't warn about non-zero exit status
        log_output - log all remote output to the debug file
        detach - detach the remote process so that it continues to run even
                 after the SSH connection closes (does NOT return output or
                 check for non-zero exit status if detach=True)
        source_profile - if True prefix the command with "source /etc/profile"
        raise_on_failure - raise exception.SSHError if command fails
        returns List of output lines
        """
        channel = self.transport.open_session()
        if detach:
            command = "nohup %s &" % command
            if source_profile:
                command = "source /etc/profile && %s" % command
            channel.exec_command(command)
            channel.close()
            self.__last_status = None
            return
        if source_profile:
            command = "source /etc/profile && %s" % command
        log.debug("executing remote command: %s" % command)
        channel.exec_command(command)
        output = self._get_output(channel, silent=silent,
                                  only_printable=only_printable)
        exit_status = channel.recv_exit_status()
        self.__last_status = exit_status
        out_str = '\n'.join(output)
        if exit_status != 0:
            msg = "remote command '%s' failed with status %d"
            msg %= (command, exit_status)
            if log_output:
                msg += ":\n%s" % out_str
            else:
                msg += " (no output log requested)"
            if not ignore_exit_status:
                if raise_on_failure:
                    raise exception.RemoteCommandFailed(
                        msg, command, exit_status, out_str)
                else:
                    log.error(msg)
            else:
                log.debug("(ignored) " + msg)
        else:
            if log_output:
                log.debug("output of '%s':\n%s" % (command, out_str))
            else:
                log.debug("output of '%s' has been hidden" % command)
        return output
Esempio n. 8
0
 def _completer(self):
     try:
         cm = self.cm
         clusters = cm.get_cluster_security_groups()
         completion_list = [cm.get_tag_from_sg(sg.name) for sg in clusters]
         return completion.ListCompleter(completion_list)
     except Exception, e:
         log.error('something went wrong fix me: %s' % e)
Esempio n. 9
0
 def _load_dsa_key(self, private_key, private_key_pass=None):
     private_key_file = os.path.expanduser(private_key)
     try:
         dsa_key = get_dsa_key(key_location=private_key_file,
                               passphrase=private_key_pass)
         log.info("Using private key %s (DSA)" % private_key)
         return dsa_key
     except (paramiko.SSHException, exception.SSHError):
         log.error('invalid dsa key or passphrase specified')
Esempio n. 10
0
 def _completer(self):
     try:
         instances = self.ec2.get_all_instances()
         completion_list = [i.id for i in instances]
         if self.show_dns_names:
             completion_list.extend([i.dns_name for i in instances])
         return completion.ListCompleter(completion_list)
     except Exception, e:
         log.error('something went wrong fix me: %s' % e)
Esempio n. 11
0
 def _completer(self):
     try:
         rimages = self.ec2.registered_images
         completion_list = [
             i.id for i in rimages if i.root_device_type == "instance-store"
         ]
         return completion.ListCompleter(completion_list)
     except Exception, e:
         log.error('something went wrong fix me: %s' % e)
Esempio n. 12
0
 def run(self, nodes, master, user, user_shell, volumes):
     if not master.ssh.isdir(self.SGE_FRESH):
         log.error("SGE is not installed on this AMI, skipping...")
         return
     log.info("Configuring SGE...")
     self._nodes = nodes
     self._master = master
     self._user = user
     self._user_shell = user_shell
     self._volumes = volumes
     self._setup_sge()
Esempio n. 13
0
 def graph(self, yaxis, title):
     if self.records is None:
         log.error("ERROR: File hasn't been read() yet.")
         return -1
     fig = plt.figure()
     ax = fig.add_subplot(111)
     ax.plot(self.records.dt, yaxis)
     ax.grid(True)
     fig.autofmt_xdate()
     filename = os.path.join(self.pngpath, title + '.png')
     plt.savefig(filename, dpi=100)
     log.debug("saved graph %s." % title)
     plt.close(fig)  # close it when its done
Esempio n. 14
0
 def setup_tmuxcc(self,
                  client=None,
                  nodes=None,
                  user='******',
                  layout='tiled'):
     log.info("Creating TMUX Control Center for user '%s'" % user)
     client = client or self._master
     nodes = nodes or self._nodes
     envname = self._envname
     orig_user = client.ssh._username
     if orig_user != user:
         client.ssh.connect(username=user)
     chunks = [chunk for chunk in utils.chunk_list(nodes, items=8)]
     num_windows = len(chunks) + len(nodes)
     if len(nodes) == 0:
         log.error("Cluster has no nodes, exiting...")
         return
     self.create_session(client, envname, num_windows=num_windows)
     if len(nodes) == 1 and client == nodes[0]:
         return
     if not self._supports_layout(client, envname, layout, window=0):
         log.warn("failed to select layout '%s', defaulting to "
                  "'main-vertical'" % layout)
         layout = "main-vertical"
         status = self._select_layout(client, envname, layout, window=0)
         if status != 0:
             raise exception.PluginError("failed to set a layout")
     for i, chunk in enumerate(chunks):
         self._rename_window(client, envname, i, 'all%s' % i)
         for j, node in enumerate(chunk):
             if j != 0:
                 self._split_window(client, envname, i)
             self._select_layout(client, envname, window=i, layout=layout)
             if node.alias != client.alias:
                 self._send_keys(client,
                                 envname,
                                 cmd='ssh %s' % node.alias,
                                 window="%d.%d" % (i, j))
     for i, node in enumerate(nodes):
         window = i + len(chunks)
         self._rename_window(client, envname, window, node.alias)
         if node.alias != client.alias:
             self._send_keys(client,
                             envname,
                             cmd='ssh %s' % node.alias,
                             window=window)
     self._select_window(client, envname, window=0)
     self._select_pane(client, envname, window=0, pane=0)
     if orig_user != user:
         client.ssh.connect(username=orig_user)
Esempio n. 15
0
class TethysCluster(object):
    @classmethod
    def add_method(cls, method_name, subcmd):
        def method(self, *args, **kwargs):
            sargs = [arg for arg in args]
            subcmd.opts, subargs = subcmd.parser.parse_args(sargs)
            kwargs['confirm'] = True
            subcmd.opts.__dict__.update(kwargs)
            subcmd.execute(subargs)

        setattr(cls, method_name, method)

    def __init__(self):
        tethyscluster_cli = cli.TethysClusterCLI()
        gparser = tethyscluster_cli.gparser
        subcmds_map = tethyscluster_cli.subcmds_map

        gopts, args = gparser.parse_args()

        # set debug level if specified
        if gopts.DEBUG:
            console.setLevel(logger.DEBUG)
            config.DEBUG_CONFIG = True
        # load TethysClusterConfig into global options
        try:
            cfg = config.TethysClusterConfig(gopts.CONFIG)
            cfg.load()
        except exception.ConfigNotFound, e:
            log.error(e.msg)
            e.display_options()
            sys.exit(1)
        except exception.ConfigError, e:
            log.error(e.msg)
            sys.exit(1)
Esempio n. 16
0
 def terminate(self, cluster_name, force=False):
     if force:
         log.warn("Ignoring cluster settings due to --force option")
     try:
         cl = self.cm.get_cluster(cluster_name,
                                  load_receipt=not force,
                                  require_keys=not force)
         if force:
             self._terminate_manually(cl)
         else:
             self._terminate_cluster(cl)
     except exception.ClusterDoesNotExist:
         raise
     except Exception:
         log.error("Failed to terminate cluster!", exc_info=True)
         if not force:
             log.error("Use -f to forcefully terminate the cluster")
         raise
Esempio n. 17
0
 def _completer(self):
     try:
         cm = self.cm
         clusters = cm.get_cluster_security_groups()
         compl_list = [cm.get_tag_from_sg(sg.name) for sg in clusters]
         max_num_nodes = 0
         for scluster in clusters:
             num_instances = len(scluster.instances())
             if num_instances > max_num_nodes:
                 max_num_nodes = num_instances
         compl_list.extend(['master'])
         compl_list.extend([str(i) for i in range(0, num_instances)])
         compl_list.extend(
             ["node%03d" % i for i in range(1, num_instances)])
         return completion.ListCompleter(compl_list)
     except Exception, e:
         print e
         log.error('something went wrong fix me: %s' % e)
Esempio n. 18
0
    def __init__(self):
        tethyscluster_cli = cli.TethysClusterCLI()
        gparser = tethyscluster_cli.gparser
        subcmds_map = tethyscluster_cli.subcmds_map

        gopts, args = gparser.parse_args()

        # set debug level if specified
        if gopts.DEBUG:
            console.setLevel(logger.DEBUG)
            config.DEBUG_CONFIG = True
        # load TethysClusterConfig into global options
        try:
            cfg = config.TethysClusterConfig(gopts.CONFIG)
            cfg.load()
        except exception.ConfigNotFound, e:
            log.error(e.msg)
            e.display_options()
            sys.exit(1)
Esempio n. 19
0
 def __init__(self,
              enable_notebook=False,
              notebook_passwd=None,
              notebook_directory=None,
              packer=None,
              log_level='INFO'):
     super(IPCluster, self).__init__()
     if isinstance(enable_notebook, basestring):
         self.enable_notebook = enable_notebook.lower().strip() == 'true'
     else:
         self.enable_notebook = enable_notebook
     self.notebook_passwd = notebook_passwd or utils.generate_passwd(16)
     self.notebook_directory = notebook_directory
     self.log_level = log_level
     if packer not in (None, 'json', 'pickle', 'msgpack'):
         log.error("Unsupported packer: %s", packer)
         self.packer = None
     else:
         self.packer = packer
Esempio n. 20
0
 def main(self):
     """
     TethysCluster main
     """
     # Handle Bash/ZSH completion if necessary
     self.handle_completion()
     # Show TethysCluster header
     self.print_header()
     # Parse subcommand options and args
     gopts, sc, opts, args = self.parse_subcommands()
     if args and args[0] == 'help':
         # make 'help' subcommand act like --help option
         sc.parser.print_help()
         sys.exit(0)
     # run the subcommand and handle exceptions
     try:
         sc.execute(args)
     except (EC2ResponseError, S3ResponseError, BotoServerError), e:
         log.error("%s: %s" % (e.error_code, e.error_message),
                   exc_info=True)
         sys.exit(1)
Esempio n. 21
0
 def _setup_ebs_volumes(self):
     """
     Mount EBS volumes, if specified in ~/.tethyscluster/config to /home
     """
     # setup /etc/fstab on master to use block device if specified
     master = self._master
     devices = master.get_device_map()
     for vol in self._volumes:
         vol = self._volumes[vol]
         vol_id = vol.get("volume_id")
         mount_path = vol.get('mount_path')
         device = vol.get("device")
         volume_partition = vol.get('partition')
         if not (vol_id and device and mount_path):
             log.error("missing required settings for vol %s" % vol)
             continue
         if device not in devices and device.startswith('/dev/sd'):
             # check for "correct" device in unpatched kernels
             device = device.replace('/dev/sd', '/dev/xvd')
             if device not in devices:
                 log.warn("Cannot find device %s for volume %s" %
                          (device, vol_id))
                 log.warn("Not mounting %s on %s" % (vol_id, mount_path))
                 log.warn("This usually means there was a problem "
                          "attaching the EBS volume to the master node")
                 continue
         partitions = master.get_partition_map(device=device)
         if not volume_partition:
             if len(partitions) == 0:
                 volume_partition = device
             elif len(partitions) == 1:
                 volume_partition = partitions.popitem()[0]
             else:
                 log.error(
                     "volume has more than one partition, please specify "
                     "which partition to use (e.g. partition=0, "
                     "partition=1, etc.) in the volume's config")
                 continue
         elif volume_partition not in partitions:
             log.warn("Cannot find partition %s on volume %s" %
                      (volume_partition, vol_id))
             log.warn("Not mounting %s on %s" % (vol_id, mount_path))
             log.warn("This either means that the volume has not "
                      "been partitioned or that the partition "
                      "specified does not exist on the volume")
             continue
         log.info("Mounting EBS volume %s on %s..." % (vol_id, mount_path))
         mount_map = master.get_mount_map()
         if volume_partition in mount_map:
             path, fstype, options = mount_map.get(volume_partition)
             if path != mount_path:
                 log.error("Volume %s is mounted on %s, not on %s" %
                           (vol_id, path, mount_path))
             else:
                 log.info("Volume %s already mounted on %s...skipping" %
                          (vol_id, mount_path))
             continue
         master.mount_device(volume_partition, mount_path)
Esempio n. 22
0
class TethysClusterCLI(object):
    """
    TethysCluster Command Line Interface
    """
    def __init__(self):
        self._gparser = None
        self.subcmds_map = {}

    @property
    def gparser(self):
        if not self._gparser:
            self._gparser = self.create_global_parser()
        return self._gparser

    def print_header(self):
        print >> sys.stderr, __description__.replace('\n', '', 1)

    def parse_subcommands(self, gparser=None):
        """
        Parse global arguments, find subcommand from list of subcommand
        objects, parse local subcommand arguments and return a tuple of
        global options, selected command object, command options, and
        command arguments.

        Call execute() on the command object to run. The command object has
        members 'gopts' and 'opts' set for global and command options
        respectively, you don't need to call execute with those but you could
        if you wanted to.
        """
        gparser = gparser or self.gparser
        # parse global options.
        gopts, args = gparser.parse_args()
        if not args:
            gparser.print_help()
            raise SystemExit("\nError: you must specify an action.")
        # set debug level if specified
        if gopts.DEBUG:
            console.setLevel(logger.DEBUG)
            config.DEBUG_CONFIG = True
        # load TethysClusterConfig into global options
        try:
            cfg = config.TethysClusterConfig(gopts.CONFIG)
            cfg.load()
        except exception.ConfigNotFound, e:
            log.error(e.msg)
            e.display_options()
            sys.exit(1)
        except exception.ConfigError, e:
            log.error(e.msg)
            sys.exit(1)
Esempio n. 23
0
 def create_image(self, size=15):
     try:
         self.clean_private_data()
         if self.host.root_device_type == "ebs":
             return self._create_image_from_ebs(size)
         return self._create_image_from_instance_store(size)
     except:
         log.error("Error occurred while creating image")
         if self._snap:
             log.error("Removing generated snapshot '%s'" % self._snap)
             self._snap.delete()
         if self._vol:
             log.error("Removing generated volume '%s'" % (self._vol.id, ))
             self._vol.detach(force=True)
             self._vol.delete()
         raise
Esempio n. 24
0
 def execute(self, args):
     if not args:
         self.parser.error("no volumes specified. exiting...")
     for arg in args:
         volid = arg
         vol = self.ec2.get_volume(volid)
         if vol.status in ['attaching', 'in-use']:
             log.error("volume is currently in use. aborting...")
             return
         if vol.status == 'detaching':
             log.error("volume is currently detaching. "
                       "please wait a few moments and try again...")
             return
         if not self.opts.confirm:
             resp = raw_input("**PERMANENTLY** delete %s (y/n)? " % volid)
             if resp not in ['y', 'Y', 'yes']:
                 log.info("Aborting...")
                 return
         if vol.delete():
             log.info("Volume %s deleted successfully" % (vol.id, ))
         else:
             log.error("Error deleting volume %s" % (vol.id, ))
Esempio n. 25
0
 def _completer(self):
     try:
         completion_list = [v.id for v in self.ec2.get_volumes()]
         return completion.ListCompleter(completion_list)
     except Exception, e:
         log.error('something went wrong fix me: %s' % e)
Esempio n. 26
0
 def visualizer(self):
     if not self._visualizer:
         try:
             from tethyscluster.balancers.sge import visualizer
         except ImportError, e:
             log.error("Error importing visualizer:")
             log.error(str(e))
             log.error("check that matplotlib and numpy are installed and:")
             log.error("   $ python -c 'import matplotlib'")
             log.error("   $ python -c 'import numpy'")
             log.error("completes without error")
             raise exception.BaseException(
                 "Failed to load stats visualizer")
         self._visualizer = visualizer.SGEVisualizer(self.stats_file,
                                                     self.plot_output_dir)
Esempio n. 27
0
            if DEBUG_CONFIG:
                log.debug("config_args = %s" % config_args)
            if missing_args:
                raise exception.PluginError(
                    "Not enough settings provided for plugin %s (missing: %s)"
                    % (plugin_name, ', '.join(missing_args)))
            config_kwargs = {}
            for arg in kwargs:
                if arg in plugin:
                    config_kwargs[arg] = plugin.get(arg)
            if DEBUG_CONFIG:
                log.debug("config_kwargs = %s" % config_kwargs)
            try:
                plug_obj = klass(*config_args, **config_kwargs)
            except Exception as exc:
                log.error("Error occured:", exc_info=True)
                raise exception.PluginLoadError(
                    "Failed to load plugin %s with "
                    "the following error: %s - %s" %
                    (setup_class, exc.__class__.__name__, exc.message))
            if not hasattr(plug_obj, '__name__'):
                setattr(plug_obj, '__name__', plugin_name)
            self.plugins[name] = plug_obj

    def _load_cluster_plugins(self, store):
        cluster_section = store
        plugins = cluster_section.get('plugins')
        if not plugins or isinstance(plugins[0], clustersetup.ClusterSetup):
            return
        plugs = []
        for plugin in plugins:
Esempio n. 28
0
class CmdShell(CmdBase):
    """
    shell

    Load an interactive IPython shell configured for tethyscluster development

    The following objects are automatically available at the prompt:

        cfg - tethyscluster.config.TethysClusterConfig instance
        cm - tethyscluster.cluster.ClusterManager instance
        ec2 - tethyscluster.awsutils.EasyEC2 instance
        s3 - tethyscluster.awsutils.EasyS3 instance

    All TethysCluster modules are automatically imported in the IPython session
    along with all TethysCluster dependencies (e.g. boto, ssh, etc.)

    If the --ipcluster=CLUSTER (-p) is passed, the IPython session will be
    automatically be configured to connect to the remote CLUSTER using
    IPython's parallel interface (requires IPython 0.11+). In this mode you
    will have the following additional objects available at the prompt:

        ipcluster - tethyscluster.cluster.Cluster instance for the cluster
        ipclient - IPython.parallel.Client instance for the cluster
        ipview - IPython.parallel.client.view.DirectView for the cluster

    Here's an example of how to run a parallel map across all nodes in the
    cluster:

        [~]> ipclient.ids
        [0, 1, 2, 3]
        [~]> res = ipview.map_async(lambda x: x**30, range(8))
        [~]> print res.get()
        [0,
         1,
         1073741824,
         205891132094649L,
         1152921504606846976L,
         931322574615478515625L,
         221073919720733357899776L,
         22539340290692258087863249L]

    See IPython parallel docs for more details
    (http://ipython.org/ipython-doc/stable/parallel)
    """

    names = ['shell', 'sh']

    def _add_to_known_hosts(self, node):
        log.info("Configuring local known_hosts file")
        user_home = os.path.expanduser('~')
        khosts = os.path.join(user_home, '.ssh', 'known_hosts')
        if not os.path.isfile(khosts):
            log.warn("Unable to configure known_hosts: file does not exist")
            return
        contents = open(khosts).read()
        if node.dns_name not in contents:
            server_pkey = node.ssh.get_server_public_key()
            khostsf = open(khosts, 'a')
            if contents[-1] != '\n':
                khostsf.write('\n')
            name_entry = '%s,%s' % (node.dns_name, node.ip_address)
            khostsf.write(' '.join([
                name_entry,
                server_pkey.get_name(),
                base64.b64encode(str(server_pkey)), '\n'
            ]))
            khostsf.close()

    def addopts(self, parser):
        parser.add_option("-p",
                          "--ipcluster",
                          dest="ipcluster",
                          action="store",
                          type="string",
                          default=None,
                          metavar="CLUSTER",
                          help="configure a parallel "
                          "IPython session on CLUSTER")

    def execute(self, args):
        local_ns = dict(cfg=self.cfg,
                        ec2=self.ec2,
                        s3=self.s3,
                        cm=self.cm,
                        tethyscluster=tethyscluster,
                        log=log)
        if self.opts.ipcluster:
            log.info("Loading parallel IPython library")
            try:
                from IPython.parallel import Client
            except ImportError, e:
                self.parser.error(
                    "Error loading parallel IPython:"
                    "\n\n%s\n\n"
                    "NOTE: IPython 0.11+ must be installed to use -p" % e)
            tag = self.opts.ipcluster
            cl = self.cm.get_cluster(tag)
            region = cl.master_node.region.name
            ipcluster_dir = os.path.join(static.TETHYSCLUSTER_CFG_DIR,
                                         'ipcluster')
            local_json = os.path.join(ipcluster_dir,
                                      "%s-%s.json" % (tag, region))
            if not os.path.exists(local_json):
                user_home = cl.master_node.getpwnam(cl.cluster_user).pw_dir
                profile_dir = posixpath.join(user_home, '.ipython',
                                             'profile_default')
                json = posixpath.join(profile_dir, 'security',
                                      'ipcontroller-client.json')
                if cl.master_node.ssh.isfile(json):
                    log.info("Fetching connector file from cluster...")
                    if not os.path.exists(ipcluster_dir):
                        os.makedirs(ipcluster_dir)
                    cl.master_node.ssh.get(json, local_json)
                else:
                    self.parser.error(
                        "IPython json file %s does not exist locally or on "
                        "the cluster. Make sure the ipcluster plugin has "
                        "been executed and completed successfully.")
            key_location = cl.master_node.key_location
            self._add_to_known_hosts(cl.master_node)
            log.info("Loading parallel IPython client and view")
            rc = Client(local_json, sshkey=key_location)
            local_ns['Client'] = Client
            local_ns['ipcluster'] = cl
            local_ns['ipclient'] = rc
            local_ns['ipview'] = rc[:]
        modules = [(tethyscluster.__name__ + '.' + module, module)
                   for module in tethyscluster.__all__]
        modules += [('boto', 'boto'), ('paramiko', 'paramiko'),
                    ('workerpool', 'workerpool'), ('jinja2', 'jinja2'),
                    ('Crypto', 'Crypto'), ('iptools', 'iptools')]
        for fullname, modname in modules:
            log.info('Importing module %s' % modname)
            try:
                __import__(fullname)
                local_ns[modname] = sys.modules[fullname]
            except ImportError, e:
                log.error("Error loading module %s: %s" % (modname, e))
Esempio n. 29
0
            if tail == posixpath.curdir:
                return
        self.mkdir(path, mode)

    def mkdir(self, path, mode=0755, ignore_failure=False):
        """
        Make a new directory on the remote machine

        If parent is True, create all parent directories that do not exist

        mode specifies unix permissions to apply to the new dir
        """
        try:
            return self.sftp.mkdir(path, mode)
        except IOError as e:
            log.error(str(e))
            if not ignore_failure:
                raise

    def get_remote_file_lines(self, remote_file, regex=None, matching=True):
        """
        Returns list of lines in a remote_file

        If regex is passed only lines that contain a pattern that matches
        regex will be returned

        If matching is set to False then only lines *not* containing a pattern
        that matches regex will be returned
        """
        f = self.remote_file(remote_file, 'r')
        flines = f.readlines()
Esempio n. 30
0
 # Parse subcommand options and args
 gopts, sc, opts, args = self.parse_subcommands()
 if args and args[0] == 'help':
     # make 'help' subcommand act like --help option
     sc.parser.print_help()
     sys.exit(0)
 # run the subcommand and handle exceptions
 try:
     sc.execute(args)
 except (EC2ResponseError, S3ResponseError, BotoServerError), e:
     log.error("%s: %s" % (e.error_code, e.error_message),
               exc_info=True)
     sys.exit(1)
 except socket.error, e:
     log.exception("Connection error:")
     log.error("Check your internet connection?")
     sys.exit(1)
 except exception.ThreadPoolException, e:
     log.error(e.format_excs())
     self.bug_found()
 except exception.ClusterDoesNotExist, e:
     cm = gopts.CONFIG.get_cluster_manager()
     cls = ''
     try:
         cls = cm.get_clusters(load_plugins=False, load_receipt=False)
     except:
         log.debug("Error fetching cluster list", exc_info=True)
     log.error(e.msg)
     if cls:
         taglist = ', '.join([c.cluster_tag for c in cls])
         active_clusters = "(active clusters: %s)" % taglist