def _start_cluster(self, master, n, profile_dir): log.info("Starting IPython cluster with %i engines" % n) # cleanup existing connection files, to prevent their use master.ssh.execute("rm -f %s/security/*.json" % profile_dir) master.ssh.execute("ipcluster start --n=%i --delay=5 --daemonize" % n, source_profile=True) # wait for JSON file to exist json = '%s/security/ipcontroller-client.json' % profile_dir log.info("Waiting for JSON connector file...", extra=dict(__nonewline__=True)) s = spinner.Spinner() s.start() while not master.ssh.isfile(json): time.sleep(1) s.stop() # retrieve JSON connection info if not os.path.isdir(IPCLUSTER_CACHE): log.info("Creating IPCluster cache directory: %s" % IPCLUSTER_CACHE) os.makedirs(IPCLUSTER_CACHE) local_json = os.path.join(IPCLUSTER_CACHE, '%s-%s.json' % (master.parent_cluster, master.region.name)) log.info("Saving JSON connector file to '%s'" % os.path.abspath(local_json)) master.ssh.get(json, local_json) return local_json
def _start_cluster(self, master, profile_dir): n_engines = max(1, master.num_processors - 1) log.info("Starting the IPython controller and %i engines on master" % n_engines) # cleanup existing connection files, to prevent their use master.ssh.execute("rm -f %s/security/*.json" % profile_dir) master.ssh.execute("ipcluster start --n=%i --delay=5 --daemonize" % n_engines) # wait for JSON file to exist json_filename = '%s/security/ipcontroller-client.json' % profile_dir log.info("Waiting for JSON connector file...", extra=dict(__nonewline__=True)) s = spinner.Spinner() s.start() try: found_file = False for i in range(30): if master.ssh.isfile(json_filename): found_file = True break time.sleep(1) if not found_file: raise ValueError( "Timeout while waiting for the cluser json file: " + json_filename) finally: s.stop() # Retrieve JSON connection info to make it possible to connect a local # client to the cluster controller if not os.path.isdir(IPCLUSTER_CACHE): log.info("Creating IPCluster cache directory: %s" % IPCLUSTER_CACHE) os.makedirs(IPCLUSTER_CACHE) local_json = os.path.join(IPCLUSTER_CACHE, '%s-%s.json' % (master.parent_cluster, master.region.name)) master.ssh.get(json_filename, local_json) # Configure security group for remote access connection_params = json.load(open(local_json, 'rb')) # For IPython version 0.14+ the list of channel ports is explicitly # provided in the connector file channel_authorized = False for channel in CHANNEL_NAMES: port = connection_params.get(channel) if port is not None: self._authorize_port(master, port, channel) channel_authorized = True # For versions prior to 0.14, the channel port numbers are not given in # the connector file: let's open everything in high port numbers if not channel_authorized: self._authorize_port(master, (1000, 65535), "IPython controller") return local_json, n_engines
def get_spinner(msg): """ Logs a status msg, starts a spinner, and returns the spinner object. This is useful for long running processes: s = get_spinner("Long running process running...") try: (do something) finally: s.stop() """ s = spinner.Spinner() log.info(msg, extra=dict(__nonewline__=True)) s.start() return s
def deploy_img(img_path, vol_size, arch, region, src_ami, dev=None, kernel_id=None, ramdisk_id=None, platform=None, remove_old=False, **cluster_kwargs): """ Deploy a filesystem image as a new AMI in a given region. This method creates a 1-node host cluster in the desired `region`, copies the filesystem image to the cluster, creates and attaches a new EBS volume with size `vol_size`, installs the image onto the new EBS volume, creates a snapshot of the resulting volume and registers a new AMI in the `region`. """ cfg = config.StarClusterConfig().load() ec2 = cfg.get_easy_ec2() ec2.connect_to_region(region) src_img = ec2.get_image(src_ami) kernel_id = kernel_id or src_img.kernel_id ramdisk_id = ramdisk_id or src_img.ramdisk_id itypemap = dict(i386='m1.small', x86_64='m1.large') dev = dev or dict(i386='/dev/sdj', x86_64='/dev/sdz')[arch] cm = cluster.ClusterManager(cfg, ec2) try: log.info("Checking for existing imghost cluster") cl = cm.get_cluster('imghost') log.info("Using existing imghost cluster") except exception.ClusterDoesNotExist: log.info("No imghost cluster found, creating...") default = cm.get_default_cluster_template() cl = cm.get_cluster_template(default, 'imghost') keys = ec2.get_keypairs() key = None for k in keys: if cfg.keys.has_key(k.name): key = cfg.keys.get(k.name) key['keyname'] = k.name break if key: cluster_kwargs.update(key) hostitype = itypemap[src_img.architecture] cluster_kwargs.update( dict(cluster_size=1, cluster_shell="bash", node_image_id=src_ami, node_instance_type=hostitype)) cl.update(cluster_kwargs) cl.start(create_only=True, validate=True) cl.wait_for_cluster() host = cl.master_node log.info("Copying %s to /mnt on master..." % img_path) host.ssh.put(img_path, '/mnt/') bname = os.path.basename(img_path) if bname.endswith('.tar.gz'): log.info("Extracting image(s)...") host.ssh.execute('cd /mnt && tar xvzf %s' % bname) bname = bname.replace('.tar.gz', '') if not host.ssh.isfile('/mnt/%s' % bname): raise exception.BaseException("/mnt/%s does not exist" % bname) log.info("Creating EBS volume") vol = ec2.create_volume(vol_size, host.placement) log.info("Attaching EBS volume %s to master as %s" % (vol.id, dev)) vol.attach(host.id, dev) log.info("Waiting for drive to attach...") s = spinner.Spinner() s.start() realdev = '/dev/xvd%s' % dev[-1] while not host.ssh.path_exists(realdev): time.sleep(10) s.stop() log.info("Installing image on volume %s ..." % vol.id) host.ssh.execute("cat /mnt/%s > %s" % (bname, realdev)) log.info("Checking filesystem...") host.ssh.execute("e2fsck -pf %s" % realdev) log.info("Resizing filesystem to fit EBS volume...") host.ssh.execute("resize2fs %s" % realdev) vol.detach() while vol.update() != 'available': time.sleep(10) xarch = arch if xarch == 'i386': xarch = 'x86' snapdesc = 'StarCluster %s %s EBS AMI Snapshot' % (platform, xarch) snap = ec2.create_snapshot(vol, description=snapdesc, wait_for_snapshot=True) vol.delete() bmap = ec2.create_root_block_device_map(snap.id, add_ephemeral_drives=True) imgname = string.lower(platform.replace(' ', '-')) imgname = 'starcluster-base-%s-%s' % (imgname, xarch) imgdesc = 'StarCluster Base %s %s (%s)' % (platform, xarch, string.capitalize(region)) oldimg = ec2.get_images(filters=dict(name=imgname)) if oldimg: oldimg = oldimg[0] oldsnap = ec2.get_snapshot( oldimg.block_device_mapping['/dev/sda1'].snapshot_id) if remove_old: log.info("Deregistering old AMI: %s" % oldimg.id) oldimg.deregister() log.info("Deleting old snapshot: %s" % oldsnap.id) oldsnap.delete() else: log.info("Existing image %s already has name '%s'" % (oldimg.id, imgname)) log.info("Please remove old image %s and snapshot %s" % (oldimg.id, oldsnap.id)) log.info("Then register new AMI with snapshot %s and name '%s'" % (snap.id, imgname)) return img = ec2.register_image(name=imgname, description=imgdesc, architecture=arch, kernel_id=kernel_id, ramdisk_id=ramdisk_id, root_device_name='/dev/sda1', block_device_map=bmap) return img