def _setup_package_repos(self): super(MesosBox, self)._setup_package_repos() sudo('apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF') codename = self.release().codename sudo( fmt('echo "deb http://repos.mesosphere.io/ubuntu {codename} main" ' '> /etc/apt/sources.list.d/mesosphere.list'))
def __setup_mesos(self): sudo("rm /etc/init/mesos-{master,slave}.conf") self._lazy_mkdir(log_dir, 'mesos', persistent=False) self._lazy_mkdir('/var/lib', 'mesos', persistent=True) self.__prepare_credentials() self.__register_upstart_jobs(mesos_services) self._post_install_mesos()
def _setup_package_repos( self ): super( ToilJenkinsSlave, self )._setup_package_repos( ) sudo( "apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF" ) distro = run( "lsb_release -is | tr '[:upper:]' '[:lower:]'" ) codename = run( "lsb_release -cs" ) run( 'echo "deb http://repos.mesosphere.io/{} {} main"' '| sudo tee /etc/apt/sources.list.d/mesosphere.list'.format( distro, codename ) )
def __install_apache_package(self, path): """ Download the given file from an Apache download mirror. Some mirrors may be down or serve crap, so we may need to retry this a couple of times. """ # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit components = path.split('/') package, tarball = components[0], components[-1] tries = iter(xrange(3)) while True: try: mirror_url = self.__apache_s3_mirror_url(path) if run("curl -Ofs '%s'" % mirror_url, warn_only=True).failed: mirror_url = self.__apache_official_mirror_url(path) run("curl -Ofs '%s'" % mirror_url) try: sudo(fmt('mkdir -p {install_dir}/{package}')) sudo( fmt('tar -C {install_dir}/{package} ' '--strip-components=1 -xzf {tarball}')) return finally: run(fmt('rm {tarball}')) except SystemExit: if next(tries, None) is None: raise else: log.warn( "Could not download or extract the package, retrying ..." )
def __register_systemd_jobs( self, service_map ): for node_type, services in service_map.iteritems( ): for service in services: service_command_path = '/usr/sbin/%s-start.sh' % service.init_name put( local_path=StringIO( "#!/bin/sh\n" + service.command ), remote_path=service_command_path, use_sudo=True ) sudo( "chown root:root '%s'" % service_command_path ) sudo( "chmod +x '%s'" % service_command_path ) self._register_init_script( service.init_name, heredoc( """ [Unit] Description={service.description} Before=docker.service Wants=docker.service Requires=mesosbox.service After=mesosbox.service [Service] Type=simple ExecStart={service_command_path} User={service.user} Group={service.user} Environment="USER={user}" LimitNOFILE=8000:8192 UMask=022 [Install] WantedBy=multi-user.target """ ) )
def _install_apache_package( self, remote_path, install_dir ): """ Download the given package from an Apache download mirror and extract it to a child directory of the directory at the given path. :param str remote_path: the URL path of the package on the Apache download server and its mirrors. :param str install_dir: The path to a local directory in which to create the directory containing the extracted package. """ # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit components = remote_path.split( '/' ) package, tarball = components[ 0 ], components[ -1 ] # Some mirrors may be down or serve crap, so we may need to retry this a couple of times. tries = iter( xrange( 3 ) ) while True: try: mirror_url = self.__apache_s3_mirror_url( remote_path ) if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed: mirror_url = self.__apache_official_mirror_url( remote_path ) run( "curl -Ofs '%s'" % mirror_url ) try: sudo( fmt( 'mkdir -p {install_dir}/{package}' ) ) sudo( fmt( 'tar -C {install_dir}/{package} ' '--strip-components=1 -xzf {tarball}' ) ) return finally: run( fmt( 'rm {tarball}' ) ) except SystemExit: if next( tries, None ) is None: raise else: log.warn( "Could not download or extract the package, retrying ..." )
def __setup_application_user(self): sudo( fmt('useradd ' '--home /home/{user} ' '--create-home ' '--user-group ' '--shell /bin/bash {user}'))
def __install_mesos_egg( self ): # FIXME: this is the ubuntu 14.04 version. Wont work with other versions. run( "wget http://downloads.mesosphere.io/master/ubuntu/14.04/" "mesos-0.22.0-py2.7-linux-x86_64.egg" ) # we need a newer version of protobuf than comes default on ubuntu sudo( "pip install --upgrade protobuf", pty=False ) sudo( "easy_install mesos-0.22.0-py2.7-linux-x86_64.egg" )
def _lazy_mkdir(self, parent, name, persistent=False): """ __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts. Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts. Note that at start-up time, /mnt/persistent may be reassigned to /mnt/ephemeral if no EBS volume is mounted at /mnt/persistent. _lazy_mkdir( '/foo', 'dir', None ) will look up an instance tag named 'persist_foo_dir' when the box starts and then behave like _lazy_mkdir( '/foo', 'dir', True ) if that tag's value is 'True', or _lazy_mkdir( '/foo', 'dir', False ) if that tag's value is False. """ assert self.lazy_dirs is not None assert '/' not in name assert parent.startswith('/') for location in (persistent_dir, ephemeral_dir): assert location.startswith('/') assert not location.startswith(parent) and not parent.startswith( location) logical_path = parent + '/' + name sudo('mkdir -p "%s"' % logical_path) self.lazy_dirs.add((parent, name, persistent)) return logical_path
def __install_toil(self): # FIXME: consider using a virtualenv for Toil like we do for s3am # Older versions of pip don't support the 'extra' mechanism used by Toil's setup.py pip('install --upgrade pip', use_sudo=True) pip(concat('install', self._toil_pip_args()), use_sudo=True) self._lazy_mkdir('/var/lib', 'toil', persistent=None) sudo('echo "TOIL_WORKDIR=/var/lib/toil" >> /etc/environment')
def __setup_mesos( self ): sudo( "rm /etc/init/mesos-{master,slave}.conf" ) self._lazy_mkdir( log_dir, 'mesos', persistent=False ) self._lazy_mkdir( '/var/lib', 'mesos', persistent=True ) self.__prepare_credentials( ) self.__register_upstart_jobs( mesos_services ) self._post_install_mesos( )
def __install_toil( self ): # FIXME: consider using a virtualenv for Toil like we do for s3am # Older versions of pip don't support the 'extra' mechanism used by Toil's setup.py pip( 'install --upgrade pip', use_sudo=True ) pip( concat( 'install', self._toil_pip_args( ) ), use_sudo=True ) self._lazy_mkdir( '/var/lib', 'toil', persistent=None ) sudo( 'echo "TOIL_WORKDIR=/var/lib/toil" >> /etc/environment' )
def __install_apache_package( self, path ): """ Download the given file from an Apache download mirror. Some mirrors may be down or serve crap, so we may need to retry this a couple of times. """ # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit components = path.split( '/' ) package, tarball = components[ 0 ], components[ -1 ] tries = iter( xrange( 3 ) ) while True: try: mirror_url = self.__apache_s3_mirror_url( path ) if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed: mirror_url = self.__apache_official_mirror_url( path ) run( "curl -Ofs '%s'" % mirror_url ) try: sudo( fmt( 'mkdir -p {install_dir}/{package}' ) ) sudo( fmt( 'tar -C {install_dir}/{package} ' '--strip-components=1 -xzf {tarball}' ) ) return finally: run( fmt( 'rm {tarball}' ) ) except SystemExit: if next( tries, None ) is None: raise else: log.warn( "Could not download or extract the package, retrying ..." )
def _setup_build_user(self): super(UbuntuGenericJenkinsSlave, self)._setup_build_user() sudo("echo 'Defaults:jenkins !requiretty' >> /etc/sudoers") for prog in ('apt-get', 'dpkg', 'gdebi'): sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/%s' >> /etc/sudoers" % prog)
def __install_mesos_egg( self ): egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg' version = self.release( ).version run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) ) # We need a newer version of protobuf than what comes default on Ubuntu pip( 'install --upgrade protobuf', use_sudo=True ) sudo( 'easy_install -a ' + egg ) run( 'rm ' + egg )
def _setup_docker( self ): # The docker and dockerbox init jobs depend on /mnt/persistent which is set up by the # mesosbox job. Adding a dependency of the docker job on mesosbox should satsify that # dependency. super( ToilBoxSupport, self )._setup_docker( ) with remote_sudo_popen( 'patch -d /lib/systemd/system' ) as patch: patch.write( self._docker_patch_heredoc( ) ) sudo ( "systemctl daemon-reload")
def _install_mesos_egg( self ): egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg' version = self.release( ).version run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) ) # We need a newer version of protobuf than what comes default on Ubuntu pip( 'install --upgrade protobuf', use_sudo=True ) sudo( 'easy_install -a ' + egg ) run( 'rm ' + egg )
def _setup_build_user( self ): super( ToilJenkinsSlave, self )._setup_build_user( ) # Allow mount and umount such that Toil tests can use an isolated loopback filesystem for # TMPDIR (and therefore Toil's work directory), thereby preventing the tracking of # left-over files from being skewed by other activities on the ephemeral file system, # like build logs, creation of .pyc files, etc. for prog in ('mount', 'umount'): sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/%s' >> /etc/sudoers" % prog )
def _setup_build_user(self): super(ToilJenkinsSlave, self)._setup_build_user() # Allow mount and umount such that Toil tests can use an isolated loopback filesystem for # TMPDIR (and therefore Toil's work directory), thereby preventing the tracking of # left-over files from being skewed by other activities on the ephemeral file system, # like build logs, creation of .pyc files, etc. for prog in ('mount', 'umount'): sudo("echo 'jenkins ALL=(ALL) NOPASSWD: /bin/%s' >> /etc/sudoers" % prog)
def _setup_package_repos( self ): assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \ "Need HTTPS support in apt-get in order to install from the Docker repository" super( DockerBox, self )._setup_package_repos( ) sudo( ' '.join( [ 'apt-key', 'adv', '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80', '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) ) codename = self.release( ).codename sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main ' '> /etc/apt/sources.list.d/docker.list' ) )
def qconf_dict( opt, d=None, file_name='qconf.tmp' ): if d: # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n' put( remote_path=file_name, local_path=StringIO( s ) ) sudo( ' '.join( [ 'qconf', opt, file_name ] ) ) run( ' '.join( [ 'rm', file_name ] ) ) else: return dict( tuple( ws.split( l, 1 ) ) for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) ) if l and not l.startswith( '#' ) )
def __install_tools( self ): """ Installs the mesos-master-discovery init script and its companion mesos-tools. The latter is a Python package distribution that's included in cgcloud-mesos as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account( ) sudo( fmt( 'mkdir -p {tools_dir}' ) ) sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) ) sudo( fmt( 'virtualenv --no-pip {tools_dir}' ) ) sudo( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): with self._project_artifacts( 'mesos-tools' ) as artifacts: pip( use_sudo=True, path=tools_dir + '/bin/pip', args=concat( 'install', artifacts ) ) sudo( fmt( 'chown -R root:root {tools_dir}' ) ) mesos_tools = "MesosTools(**%r)" % dict( user=user, shared_dir=self._shared_dir( ), ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs ) self.lazy_dirs = None # make sure it can't be used anymore once we are done with it self._register_init_script( "mesosbox", heredoc( """ description "Mesos master discovery" console log start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [!2345] pre-start script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.mesos_tools import MesosTools mesos_tools = {mesos_tools} mesos_tools.start() END end script post-stop script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.mesos_tools import MesosTools mesos_tools = {mesos_tools} mesos_tools.stop() END end script""" ) )
def setup_repo_host_keys(self, user=None): # # Pre-seed the host keys from bitbucket and github, such that ssh doesn't prompt during # the initial checkouts. # for host in [ 'bitbucket.org', 'github.com' ]: command = 'ssh-keyscan -t rsa %s >> ~/.ssh/known_hosts' % host if user is None: run( command ) elif user == 'root': sudo( command ) else: sudo( command, user=user, sudo_args='-i' )
def qconf_dict(opt, d=None, file_name="qconf.tmp"): if d: # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works s = "\n".join(" ".join(i) for i in d.iteritems()) + "\n" put(remote_path=file_name, local_path=StringIO(s)) sudo(" ".join(["qconf", opt, file_name])) run(" ".join(["rm", file_name])) else: return dict( tuple(ws.split(l, 1)) for l in nl.split(run("SGE_SINGLE_LINE=1 qconf " + opt)) if l and not l.startswith("#") )
def __install_hadoop(self): # Download and extract Hadoop path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' ) self._install_apache_package(path, install_dir) # Add environment variables to hadoop_env.sh hadoop_env = dict(HADOOP_LOG_DIR=self._lazy_mkdir(log_dir, "hadoop"), JAVA_HOME='/usr/lib/jvm/java-8-oracle') hadoop_env_sh_path = fmt( "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh") with remote_open(hadoop_env_sh_path, use_sudo=True) as hadoop_env_sh: hadoop_env_sh.write('\n') for name, value in hadoop_env.iteritems(): hadoop_env_sh.write(fmt('export {name}="{value}"\n')) # Configure HDFS hdfs_dir = var_dir + "/hdfs" put(use_sudo=True, remote_path=fmt('{install_dir}/hadoop/etc/hadoop/hdfs-site.xml'), local_path=StringIO( self.__to_hadoop_xml_config({ 'dfs.replication': str(hdfs_replication), 'dfs.permissions': 'false', 'dfs.name.dir': self._lazy_mkdir(hdfs_dir, 'name', persistent=True), 'dfs.data.dir': self._lazy_mkdir(hdfs_dir, 'data', persistent=True), 'fs.checkpoint.dir': self._lazy_mkdir(hdfs_dir, 'checkpoint', persistent=True), 'dfs.namenode.http-address': 'spark-master:50070', 'dfs.namenode.secondary.http-address': 'spark-master:50090' }))) # Configure Hadoop put(use_sudo=True, remote_path=fmt('{install_dir}/hadoop/etc/hadoop/core-site.xml'), local_path=StringIO( self.__to_hadoop_xml_config( {'fs.default.name': 'hdfs://spark-master:8020'}))) # Make shell auto completion easier sudo(fmt('find {install_dir}/hadoop -name "*.cmd" | xargs rm')) # Install upstart jobs self.__register_upstart_jobs(hadoop_services)
def _setup_docker(self): for docker_user in set(self._docker_users()): sudo("usermod -aG docker " + docker_user) prefixes = self._docker_data_prefixes() if prefixes: prefixes = " ".join(map(quote, prefixes)) self._run_init_script("docker", "stop") # Make sure Docker's aufs backend isn't mounted anymore sudo("umount /var/lib/docker/aufs", warn_only=True) # Backup initial state of data directory so we can initialize an empty ephemeral volume sudo("tar -czC /var/lib docker > /var/lib/docker.tar.gz") # Then delete it and recreate it as an empty directory to serve as the bind mount point sudo("rm -rf /var/lib/docker && mkdir /var/lib/docker") self._register_init_script( "dockerbox", heredoc( """ description "Placement of /var/lib/docker" console log start on starting docker stop on stopped docker pre-start script echo echo "This is the dockerbox pre-start script" set -ex if mountpoint -q /var/lib/docker; then echo "The directory '/var/lib/docker' is already mounted, exiting." else for prefix in {prefixes}; do # Prefix must refer to a separate volume, e.g. ephemeral or EBS if mountpoint -q "$prefix"; then # Make sure Docker's aufs backend isn't mounted anymore umount /var/lib/docker/aufs || true if test -d "$prefix/var/lib/docker"; then echo "The directory '$prefix/var/lib/docker' already exists, using it." else mkdir -p "$prefix/var/lib" # If /var/lib/docker contains files ... if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then # ... move it to prefix ... mv /var/lib/docker "$prefix/var/lib" # ... and recreate it as an empty mount point, ... mkdir -p /var/lib/docker else # ... otherwise untar the initial backup. tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz fi fi # Now bind-mount into /var/lib/docker mount --bind "$prefix/var/lib/docker" /var/lib/docker break else echo "The prefix directory '$prefix' is not a mount point, skipping." fi done fi end script""" ), ) self._run_init_script("docker", "start")
def _setup_docker(self): for docker_user in set(self._docker_users()): sudo("usermod -aG docker " + docker_user) prefixes = self._docker_data_prefixes() if prefixes: prefixes = ' '.join(map(quote, prefixes)) self._run_init_script('docker', 'stop') # Make sure Docker's aufs backend isn't mounted anymore sudo('umount /var/lib/docker/aufs', warn_only=True) # Backup initial state of data directory so we can initialize an empty ephemeral volume sudo('tar -czC /var/lib docker > /var/lib/docker.tar.gz') # Then delete it and recreate it as an empty directory to serve as the bind mount point sudo('rm -rf /var/lib/docker && mkdir /var/lib/docker') self._register_init_script( 'dockerbox', heredoc(""" description "Placement of /var/lib/docker" console log start on starting docker stop on stopped docker pre-start script echo echo "This is the dockerbox pre-start script" set -ex if mountpoint -q /var/lib/docker; then echo "The directory '/var/lib/docker' is already mounted, exiting." else for prefix in {prefixes}; do # Prefix must refer to a separate volume, e.g. ephemeral or EBS if mountpoint -q "$prefix"; then # Make sure Docker's aufs backend isn't mounted anymore umount /var/lib/docker/aufs || true if test -d "$prefix/var/lib/docker"; then echo "The directory '$prefix/var/lib/docker' already exists, using it." else mkdir -p "$prefix/var/lib" # If /var/lib/docker contains files ... if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then # ... move it to prefix ... mv /var/lib/docker "$prefix/var/lib" # ... and recreate it as an empty mount point, ... mkdir -p /var/lib/docker else # ... otherwise untar the initial backup. tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz fi fi # Now bind-mount into /var/lib/docker mount --bind "$prefix/var/lib/docker" /var/lib/docker break else echo "The prefix directory '$prefix' is not a mount point, skipping." fi done fi end script""")) self._run_init_script('docker', 'start')
def __install_sparkbox_tools( self ): """ Installs the spark-master-discovery init script and its companion spark-tools. The latter is a Python package distribution that's included in cgcloud-spark as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account( ) sudo( fmt( 'mkdir -p {tools_dir} {persistent_dir} {ephemeral_dir}' ) ) sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) ) run( fmt( 'virtualenv --no-pip {tools_dir}' ) ) run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) spark_tools_artifacts = ' '.join( self._project_artifacts( 'spark-tools' ) ) with settings( forward_agent=True ): run( fmt( '{tools_dir}/bin/pip install {spark_tools_artifacts}' ) ) sudo( fmt( 'chown -R root:root {tools_dir}' ) ) spark_tools = "SparkTools(**%r)" % dict( user=user, install_dir=install_dir, ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs ) self._register_init_script( "sparkbox", heredoc( """ description "Spark/HDFS master discovery" console log start on runlevel [2345] stop on runlevel [016] pre-start script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.start() end script post-stop script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.stop() END end script""" ) ) script_path = "/usr/local/bin/sparkbox-manage-slaves" put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """ #!{tools_dir}/bin/python2.7 import sys import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) ) sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
def __lazy_mkdir( self, parent, name, persistent=False ): """ __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts. Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts. Note that at start-up time, /mnt/persistent may be reassigned to /mnt/ephemeral if no EBS volume is mounted at /mnt/persistent. """ assert '/' not in name assert parent.startswith( '/' ) for location in ( persistent_dir, ephemeral_dir ): assert location.startswith( '/' ) assert not location.startswith( parent ) and not parent.startswith( location ) logical_path = parent + '/' + name sudo( 'mkdir -p "%s"' % logical_path ) self.lazy_dirs.add( ( parent, name, persistent ) ) return logical_path
def __setup_application_user( self ): sudo( fmt( 'useradd ' '--home /home/{user} ' '--create-home ' '--user-group ' '--shell /bin/bash {user}' ) ) sudoer_file = heredoc( """ # CGcloud - MesosBox # User rules for ubuntu mesosbox ALL=(ALL) NOPASSWD:ALL # User rules for ubuntu mesosbox ALL=(ALL) NOPASSWD:ALL """ ) sudoer_file_path = '/etc/sudoers.d/89-mesosbox-user' put( local_path=StringIO( sudoer_file ), remote_path=sudoer_file_path, use_sudo=True, mode=0440 ) sudo( "chown root:root '%s'" % sudoer_file_path )
def _setup_build_user(self): super( CentosRpmbuildJenkinsSlave, self )._setup_build_user( ) # Some RPM builds depend on the product of other RPM builds to be installed so we need to # be able to run rpm in between RPM builds sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" ) sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" ) sudo( "useradd -s /sbin/nologin mockbuild" ) # goes with the mock package
def _setup_package_repos( self ): super( SparkBox, self )._setup_package_repos( ) sudo( 'add-apt-repository -y ppa:webupd8team/java' ) sudo( 'echo debconf shared/accepted-oracle-license-v1-1 select true ' '| sudo debconf-set-selections' ) sudo( 'echo debconf shared/accepted-oracle-license-v1-1 seen true ' '| sudo debconf-set-selections' )
def __install_hadoop( self ): # Download and extract Hadoop path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' ) self.__install_apache_package( path ) # Add environment variables to hadoop_env.sh hadoop_env = dict( HADOOP_LOG_DIR=self._lazy_mkdir( log_dir, "hadoop" ), JAVA_HOME='/usr/lib/jvm/java-7-oracle' ) hadoop_env_sh_path = fmt( "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh" ) with remote_open( hadoop_env_sh_path, use_sudo=True ) as hadoop_env_sh: hadoop_env_sh.write( '\n' ) for name, value in hadoop_env.iteritems( ): hadoop_env_sh.write( fmt( 'export {name}="{value}"\n' ) ) # Configure HDFS hdfs_dir = var_dir + "/hdfs" put( use_sudo=True, remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/hdfs-site.xml' ), local_path=StringIO( self.__to_hadoop_xml_config( { 'dfs.replication': str( hdfs_replication ), 'dfs.permissions': 'false', 'dfs.name.dir': self._lazy_mkdir( hdfs_dir, 'name', persistent=True ), 'dfs.data.dir': self._lazy_mkdir( hdfs_dir, 'data', persistent=True ), 'fs.checkpoint.dir': self._lazy_mkdir( hdfs_dir, 'checkpoint', persistent=True ), 'dfs.namenode.http-address': 'spark-master:50070', 'dfs.namenode.secondary.http-address': 'spark-master:50090' } ) ) ) # Configure Hadoop put( use_sudo=True, remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/core-site.xml' ), local_path=StringIO( self.__to_hadoop_xml_config( { 'fs.default.name': 'hdfs://spark-master:8020' } ) ) ) # Make shell auto completion easier sudo( fmt( 'find {install_dir}/hadoop -name "*.cmd" | xargs rm' ) ) # Install upstart jobs self.__register_upstart_jobs( hadoop_services )
def _setup_package_repos(self): assert run( "test -e /usr/lib/apt/methods/https", warn_only=True ).succeeded, "Need HTTPS support in apt-get in order to install from the Docker repository" super(DockerBox, self)._setup_package_repos() sudo( " ".join( [ "apt-key", "adv", "--keyserver", "hkp://p80.pool.sks-keyservers.net:80", "--recv-keys", "58118E89F3A912897C070ADBF76221572C52609D", ] ) ) codename = self.release().codename sudo( fmt( "echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main " "> /etc/apt/sources.list.d/docker.list" ) )
def _lazy_mkdir( self, parent, name, persistent=False ): """ __lazy_mkdir( '/foo', 'dir', True ) creates /foo/dir now and ensures that /mnt/persistent/foo/dir is created and bind-mounted into /foo/dir when the box starts. Likewise, __lazy_mkdir( '/foo', 'dir', False) creates /foo/dir now and ensures that /mnt/ephemeral/foo/dir is created and bind-mounted into /foo/dir when the box starts. Note that at start-up time, /mnt/persistent may be reassigned to /mnt/ephemeral if no EBS volume is mounted at /mnt/persistent. _lazy_mkdir( '/foo', 'dir', None ) will look up an instance tag named 'persist_foo_dir' when the box starts and then behave like _lazy_mkdir( '/foo', 'dir', True ) if that tag's value is 'True', or _lazy_mkdir( '/foo', 'dir', False ) if that tag's value is False. """ assert self.lazy_dirs is not None assert '/' not in name assert parent.startswith( '/' ) for location in (persistent_dir, ephemeral_dir): assert location.startswith( '/' ) assert not location.startswith( parent ) and not parent.startswith( location ) logical_path = parent + '/' + name sudo( 'mkdir -p "%s"' % logical_path ) self.lazy_dirs.add( (parent, name, persistent) ) return logical_path
def __install_mesosbox_tools( self ): """ Installs the mesos-master-discovery init script and its companion mesos-tools. The latter is a Python package distribution that's included in cgcloud-mesos as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' sudo( fmt( 'mkdir -p {tools_dir}') ) sudo( fmt( 'virtualenv --no-pip {tools_dir}' ) ) sudo( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) mesos_tools_artifacts = ' '.join( self._project_artifacts( 'mesos-tools' ) ) with settings( forward_agent=True ): sudo( fmt( '{tools_dir}/bin/pip install {mesos_tools_artifacts}' ) ) mesos_tools = "MesosTools(**%r)" % dict(user=user ) self._register_init_script( "mesosbox", heredoc( """ description "Mesos master discovery" console log start on runlevel [2345] stop on runlevel [016] pre-start script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.mesos_tools import MesosTools mesos_tools = {mesos_tools} mesos_tools.start() END end script post-stop script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.mesos_tools import MesosTools mesos_tools = {mesos_tools} mesos_tools.stop() END end script""" ) )
def __setup_agent(self): availability_zone = self.ctx.availability_zone namespace = self.ctx.namespace ec2_keypair_globs = ' '.join( shell.quote(_) for _ in self.ec2_keypair_globs) accounts = ' '.join([self.admin_account()] + self.other_accounts()) admin_account = self.admin_account() run_dir = '/var/run/cgcloudagent' log_dir = '/var/log' install_dir = '/opt/cgcloudagent' # Lucid & CentOS 5 have an ancient pip pip('install --upgrade pip==1.5.2', use_sudo=True) pip('install --upgrade virtualenv', use_sudo=True) sudo(fmt('mkdir -p {install_dir}')) sudo(fmt('chown {admin_account}:{admin_account} {install_dir}')) # By default, virtualenv installs the latest version of pip. We want a specific # version, so we tell virtualenv not to install pip and then install that version of # pip using easy_install. run(fmt('virtualenv --no-pip {install_dir}')) run(fmt('{install_dir}/bin/easy_install pip==1.5.2')) with settings(forward_agent=True): venv_pip = install_dir + '/bin/pip' if self._enable_agent_metrics(): pip(path=venv_pip, args='install psutil==3.4.1') with self._project_artifacts('agent') as artifacts: pip( path=venv_pip, args=concat( 'install', '--allow-external', 'argparse', # needed on CentOS 5 and 6 artifacts)) sudo(fmt('mkdir {run_dir}')) script = self.__gunzip_base64_decode( run( fmt('{install_dir}/bin/cgcloudagent' ' --init-script' ' --zone {availability_zone}' ' --namespace {namespace}' ' --accounts {accounts}' ' --keypairs {ec2_keypair_globs}' ' --user root' ' --group root' ' --pid-file {run_dir}/cgcloudagent.pid' ' --log-spill {log_dir}/cgcloudagent.out' '| gzip -c | base64'))) self._register_init_script('cgcloudagent', script) self._run_init_script('cgcloudagent')
def __setup_agent( self ): availability_zone = self.ctx.availability_zone namespace = self.ctx.namespace ec2_keypair_globs = ' '.join( shell.quote( _ ) for _ in self.ec2_keypair_globs ) accounts = ' '.join( [ self.admin_account( ) ] + self.other_accounts( ) ) admin_account = self.admin_account( ) run_dir = '/var/run/cgcloudagent' log_dir = '/var/log' install_dir = '/opt/cgcloudagent' # Lucid & CentOS 5 have an ancient pip pip( 'install --upgrade pip==1.5.2', use_sudo=True ) pip( 'install --upgrade virtualenv', use_sudo=True ) sudo( fmt( 'mkdir -p {install_dir}' ) ) sudo( fmt( 'chown {admin_account}:{admin_account} {install_dir}' ) ) # By default, virtualenv installs the latest version of pip. We want a specific # version, so we tell virtualenv not to install pip and then install that version of # pip using easy_install. run( fmt( 'virtualenv --no-pip {install_dir}' ) ) run( fmt( '{install_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): venv_pip = install_dir + '/bin/pip' if self._enable_agent_metrics( ): pip( path=venv_pip, args='install psutil==3.4.1' ) with self._project_artifacts( 'agent' ) as artifacts: pip( path=venv_pip, args=concat( 'install', '--allow-external', 'argparse', # needed on CentOS 5 and 6 artifacts ) ) sudo( fmt( 'mkdir {run_dir}' ) ) script = self.__gunzip_base64_decode( run( fmt( '{install_dir}/bin/cgcloudagent' ' --init-script' ' --zone {availability_zone}' ' --namespace {namespace}' ' --accounts {accounts}' ' --keypairs {ec2_keypair_globs}' ' --user root' ' --group root' ' --pid-file {run_dir}/cgcloudagent.pid' ' --log-spill {log_dir}/cgcloudagent.out' '| gzip -c | base64' ) ) ) self._register_init_script( 'cgcloudagent', script ) self._run_init_script( 'cgcloudagent' )
def __install_spark(self): # Download and extract Spark path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' ) self._install_apache_package(path, install_dir) spark_dir = var_dir + "/spark" # Add environment variables to spark_env.sh spark_env_sh_path = fmt("{install_dir}/spark/conf/spark-env.sh") sudo(fmt("cp {spark_env_sh_path}.template {spark_env_sh_path}")) spark_env = dict( SPARK_LOG_DIR=self._lazy_mkdir(log_dir, "spark"), SPARK_WORKER_DIR=self._lazy_mkdir(spark_dir, "work"), SPARK_LOCAL_DIRS=self._lazy_mkdir(spark_dir, "local"), JAVA_HOME='/usr/lib/jvm/java-8-oracle', SPARK_MASTER_IP='spark-master', HADOOP_CONF_DIR=fmt("{install_dir}/hadoop/etc/hadoop")) with remote_open(spark_env_sh_path, use_sudo=True) as spark_env_sh: spark_env_sh.write('\n') for name, value in spark_env.iteritems(): spark_env_sh.write(fmt('export {name}="{value}"\n')) # Configure Spark properties spark_defaults = { 'spark.eventLog.enabled': 'true', 'spark.eventLog.dir': self._lazy_mkdir(spark_dir, "history"), 'spark.master': 'spark://spark-master:7077' } spark_defaults_conf_path = fmt( "{install_dir}/spark/conf/spark-defaults.conf") sudo( fmt("cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}" )) with remote_open(spark_defaults_conf_path, use_sudo=True) as spark_defaults_conf: for name, value in spark_defaults.iteritems(): spark_defaults_conf.write(fmt("{name}\t{value}\n")) # Make shell auto completion easier sudo(fmt('find {install_dir}/spark -name "*.cmd" | xargs rm')) # Install upstart jobs self.__register_upstart_jobs(spark_services)
def __install_spark( self ): # Download and extract Spark path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' ) self._install_apache_package( path, install_dir ) spark_dir = var_dir + "/spark" # Add environment variables to spark_env.sh spark_env_sh_path = fmt( "{install_dir}/spark/conf/spark-env.sh" ) sudo( fmt( "cp {spark_env_sh_path}.template {spark_env_sh_path}" ) ) spark_env = dict( SPARK_LOG_DIR=self._lazy_mkdir( log_dir, "spark" ), SPARK_WORKER_DIR=self._lazy_mkdir( spark_dir, "work" ), SPARK_LOCAL_DIRS=self._lazy_mkdir( spark_dir, "local" ), JAVA_HOME='/usr/lib/jvm/java-8-oracle', SPARK_MASTER_IP='spark-master', HADOOP_CONF_DIR=fmt( "{install_dir}/hadoop/etc/hadoop" ), SPARK_PUBLIC_DNS="$(curl -s http://169.254.169.254/latest/meta-data/public-hostname)" ) with remote_open( spark_env_sh_path, use_sudo=True ) as spark_env_sh: spark_env_sh.write( '\n' ) for name, value in spark_env.iteritems( ): spark_env_sh.write( fmt( 'export {name}="{value}"\n' ) ) # Configure Spark properties spark_defaults = { 'spark.eventLog.enabled': 'true', 'spark.eventLog.dir': self._lazy_mkdir( spark_dir, "history" ), 'spark.master': 'spark://spark-master:7077' } spark_defaults_conf_path = fmt( "{install_dir}/spark/conf/spark-defaults.conf" ) sudo( fmt( "cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}" ) ) with remote_open( spark_defaults_conf_path, use_sudo=True ) as spark_defaults_conf: for name, value in spark_defaults.iteritems( ): spark_defaults_conf.write( fmt( "{name}\t{value}\n" ) ) # Make shell auto completion easier sudo( fmt( 'find {install_dir}/spark -name "*.cmd" | xargs rm' ) ) # Install upstart jobs self.__register_upstart_jobs( spark_services )
def __configure_slurm(self): """ Configures SLURM in a single-node configuration with text-file accounting :return: """ # Create munge key and start sudo('/usr/sbin/create-munge-key') sudo('/usr/sbin/service munge start') slurm_acct_file = '/var/log/slurm-llnl/slurm-acct.txt' # Default values placed into compute node config, will be replaced by pre script slurm_conf = heredoc(""" ClusterName=jenkins-testing ControlMachine=localhost SlurmUser=slurm SlurmctldPort=6817 SlurmdPort=6818 StateSaveLocation=/tmp SlurmdSpoolDir=/tmp/slurmd SwitchType=switch/none MpiDefault=none SlurmctldPidFile=/var/run/slurmctld.pid SlurmdPidFile=/var/run/slurmd.pid ProctrackType=proctrack/pgid CacheGroups=0 ReturnToService=0 SlurmctldTimeout=300 SlurmdTimeout=300 InactiveLimit=0 MinJobAge=300 KillWait=30 Waittime=0 SchedulerType=sched/backfill SelectType=select/cons_res FastSchedule=1 # LOGGING SlurmctldDebug=3 SlurmdDebug=3 JobCompType=jobcomp/none # ACCOUNTING AccountingStorageLoc={slurm_acct_file} AccountingStorageType=accounting_storage/filetxt AccountingStoreJobComment=YES JobAcctGatherFrequency=30 JobAcctGatherType=jobacct_gather/linux # COMPUTE NODES NodeName=localhost CPUs=1 State=UNKNOWN RealMemory=256 PartitionName=debug Nodes=localhost Default=YES MaxTime=INFINITE State=UP """) slurm_conf_tmp = '/tmp/slurm.conf' slurm_conf_file = '/etc/slurm-llnl/slurm.conf' # Put config file in: /etc/slurm-llnl/slurm.conf put(remote_path=slurm_conf_tmp, local_path=StringIO(slurm_conf)) sudo('mkdir -p /etc/slurm-llnl') sudo('mv %s %s' % (slurm_conf_tmp, slurm_conf_file)) sudo('chown root:root %s' % slurm_conf_file) # Touch the accounting job file and make sure it's owned by slurm user sudo('mkdir -p /var/log/slurm-llnl') sudo('touch %s' % slurm_acct_file) sudo('chown slurm:slurm %s' % slurm_acct_file) sudo('chmod 644 %s' % slurm_acct_file) # Register an init-script that sets the CPUs and RealMemory in slurm.conf # slurm.conf needs cpus and memory in order to handle jobs with these resource requests self._register_init_script( 'slurm-llnl-pre', heredoc(""" description "Slurm pre-start configuration" console log start on filesystem pre-start script CPUS=$(/usr/bin/nproc) MEMORY=$(cat /proc/meminfo | grep MemTotal | awk '{{print $2, "/ 1024"}}' | bc) sed -i "s/CPUs=[0-9]\+/CPUs=${{CPUS}}/" {slurm_conf_file} sed -i "s/RealMemory=[0-9]\+/RealMemory=${{MEMORY}}/" {slurm_conf_file} end script""")) # Start slurm services self._run_init_script('slurm-llnl-pre') self._run_init_script('slurm-llnl') # Ensure partition is up sudo('scontrol update NodeName=localhost State=Down') sudo('scontrol update NodeName=localhost State=Resume')
def __install_tools( self ): """ Installs the spark-master-discovery init script and its companion spark-tools. The latter is a Python package distribution that's included in cgcloud-spark as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account( ) sudo( fmt( 'mkdir -p {tools_dir}' ) ) sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) ) run( fmt( 'virtualenv --no-pip {tools_dir}' ) ) run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): with self._project_artifacts( 'spark-tools' ) as artifacts: pip( use_sudo=True, path=tools_dir + '/bin/pip', args=concat( 'install', artifacts ) ) sudo( fmt( 'chown -R root:root {tools_dir}' ) ) spark_tools = "SparkTools(**%r)" % dict( user=user, shared_dir=self._shared_dir( ), install_dir=install_dir, ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs ) self.lazy_dirs = None # make sure it can't be used anymore once we are done with it self._register_init_script( "sparkbox", heredoc( """ description "Spark/HDFS master discovery" console log start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [!2345] pre-start script for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.start() END then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1 end script post-stop script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.stop() END end script""" ) ) script_path = "/usr/local/bin/sparkbox-manage-slaves" put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """ #!{tools_dir}/bin/python2.7 import sys import logging # Prefix each log line to make it more obvious that it's the master logging when the # slave calls this script via ssh. logging.basicConfig( level=logging.INFO, format="manage_slaves: " + logging.BASIC_FORMAT ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) ) sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
def __disable_mesos_daemons(self): for daemon in ('master', 'slave'): sudo('echo manual > /etc/init/mesos-%s.override' % daemon)
def _setup_package_repos( self ): super( SparkBox, self )._setup_package_repos( ) sudo( 'add-apt-repository -y ppa:webupd8team/java' )
def __setup_application_user( self ): sudo( fmt( 'useradd ' '--home /home/{user} ' '--create-home ' '--user-group ' '--shell /bin/bash {user}' ) )
def __prepare_credentials( self ): # Create the credentials file and transfer ownership to mesosbox sudo( 'mkdir -p /etc/mesos' ) sudo( 'echo toil liot > /etc/mesos/credentials' ) sudo( 'chown mesosbox:mesosbox /etc/mesos/credentials' )
def __install_parasol( self ): run( "git clone [email protected]:BD2KGenomics/parasol-binaries.git" ) sudo( "cp parasol-binaries/* /usr/local/bin" )
def __install_parasol(self): run("git clone https://github.com/BD2KGenomics/parasol-binaries.git") sudo("cp parasol-binaries/* /usr/local/bin") run("rm -rf parasol-binaries")
def __disable_mesos_daemons( self ): for daemon in ('master', 'slave'): sudo( 'echo manual > /etc/init/mesos-%s.override' % daemon )
def __install_tools(self): """ Installs the spark-master-discovery init script and its companion spark-tools. The latter is a Python package distribution that's included in cgcloud-spark as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account() sudo(fmt('mkdir -p {tools_dir}')) sudo(fmt('chown {admin}:{admin} {tools_dir}')) run(fmt('virtualenv --no-pip {tools_dir}')) run(fmt('{tools_dir}/bin/easy_install pip==1.5.2')) with settings(forward_agent=True): with self._project_artifacts('spark-tools') as artifacts: pip(use_sudo=True, path=tools_dir + '/bin/pip', args=concat('install', artifacts)) sudo(fmt('chown -R root:root {tools_dir}')) spark_tools = "SparkTools(**%r)" % dict(user=user, shared_dir=self._shared_dir(), install_dir=install_dir, ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs) self.lazy_dirs = None # make sure it can't be used anymore once we are done with it self._register_init_script( "sparkbox", heredoc(""" description "Spark/HDFS master discovery" console log start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [!2345] pre-start script for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.start() END then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1 end script post-stop script {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.stop() END end script""")) script_path = "/usr/local/bin/sparkbox-manage-slaves" put(remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc(""" #!{tools_dir}/bin/python2.7 import sys import logging # Prefix each log line to make it more obvious that it's the master logging when the # slave calls this script via ssh. logging.basicConfig( level=logging.INFO, format="manage_slaves: " + logging.BASIC_FORMAT ) from cgcloud.spark_tools import SparkTools spark_tools = {spark_tools} spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )"""))) sudo(fmt("chown root:root {script_path} && chmod 755 {script_path}"))
def __configure_gridengine( self ): """ Configure the GridEngine daemons (master and exec) and creata a default queue. Ensure that the queue is updated to reflect the number of cores actually available. """ ws = re.compile( r'\s+' ) nl = re.compile( r'[\r\n]+' ) def qconf( opt, **kwargs ): return qconf_dict( opt, kwargs ) def qconf_dict( opt, d=None, file_name='qconf.tmp' ): if d: # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n' put( remote_path=file_name, local_path=StringIO( s ) ) sudo( ' '.join( [ 'qconf', opt, file_name ] ) ) run( ' '.join( [ 'rm', file_name ] ) ) else: return dict( tuple( ws.split( l, 1 ) ) for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) ) if l and not l.startswith( '#' ) ) # Add the user defined in fname to the Sun Grid Engine cluster. qconf( '-Auser', name=Jenkins.user, oticket='0', fshare='0', delete_time='0', default_project='NONE' ) # Adds users to Sun Grid Engine user access lists (ACLs). sudo( 'qconf -au %s arusers' % Jenkins.user ) # Add hosts hostname to the list of hosts allowed to submit Sun Grid Engine jobs and # control their behavior only. sudo( 'qconf -as localhost' ) # Remove all currently defined execution hosts run( 'for i in `qconf -sel`; do sudo qconf -de $i ; done' ) # Add an execution host qconf( '-Ae', hostname='localhost', load_scaling='NONE', complex_values='NONE', user_lists='arusers', xuser_lists='NONE', projects='NONE', xprojects='NONE', usage_scaling='NONE', report_variables='NONE' ) # Add a parallel environment qconf( '-Ap', pe_name='smp', slots='999', user_lists='NONE', xuser_lists='NONE', start_proc_args='/bin/true', stop_proc_args='/bin/true', allocation_rule='$pe_slots', control_slaves='FALSE', job_is_first_task='TRUE', urgency_slots='min', accounting_summary='FALSE' ) # Add a queue, the slots and processors will be adjusted dynamically, by an init script qconf( '-Aq', qname='all.q', processors='1', slots='1', hostlist='localhost', seq_no='0', load_thresholds='np_load_avg=1.75', suspend_thresholds='NONE', nsuspend='1', suspend_interval='00:05:00', priority='0', min_cpu_interval='00:05:00', qtype='BATCH INTERACTIVE', ckpt_list='NONE', pe_list='make smp', rerun='FALSE', tmpdir='/tmp', shell='/bin/bash', prolog='NONE', epilog='NONE', shell_start_mode='posix_compliant', starter_method='NONE', suspend_method='NONE', resume_method='NONE', terminate_method='NONE', notify='00:00:60', owner_list='NONE', user_lists='arusers', xuser_lists='NONE', subordinate_list='NONE', complex_values='NONE', projects='NONE', xprojects='NONE', calendar='NONE', initial_state='default', s_rt='INFINITY', h_rt='INFINITY', s_cpu='INFINITY', h_cpu='INFINITY', s_fsize='INFINITY', h_fsize='INFINITY', s_data='INFINITY', h_data='INFINITY', s_stack='INFINITY', h_stack='INFINITY', s_core='INFINITY', h_core='INFINITY', s_rss='INFINITY', h_rss='INFINITY', s_vmem='INFINITY', h_vmem='INFINITY' ) # Enable on-demand scheduling. This will eliminate the long time that jobs spend waiting # in the qw state. There is no -Asconf so we have to fake it using -ssconf and -Msconf. sconf = qconf( '-ssconf' ) sconf.update( dict( flush_submit_sec='1', flush_finish_sec='1', schedule_interval='0:0:1' ) ) qconf_dict( '-Msconf', sconf ) # Enable immediate flushing of the accounting file. The SGE batch system in Toil uses the # qacct program to determine the exit code of a finished job. The qacct program reads # the accounting file. By default, this file is written to every 15 seconds which means # that it may take up to 15 seconds before a finished job is seen by Toil. An # accounting_flush_time value of 00:00:00 causes the accounting file to be flushed # immediately, allowing qacct to report the status of finished jobs immediately. Again, # there is no -Aconf, so we fake it with -sconf and -Mconf. Also, the file name has to be # 'global'. conf = qconf( '-sconf' ) params = dict( tuple( e.split( '=' ) ) for e in conf[ 'reporting_params' ].split( ' ' ) ) params[ 'accounting_flush_time' ] = '00:00:00' conf[ 'reporting_params' ] = ' '.join( '='.join( e ) for e in params.iteritems( ) ) qconf_dict( '-Mconf', conf, file_name='global' ) # Register an init-script that ensures GridEngine uses localhost instead of hostname path = '/var/lib/gridengine/default/common/' self._register_init_script( 'gridengine-pre', heredoc( """ description "GridEngine pre-start configuration" console log start on filesystem pre-start script echo localhost > {path}/act_qmaster ; chown sgeadmin:sgeadmin {path}/act_qmaster echo localhost `hostname -f` > {path}/host_aliases end script""" ) ) # Register an init-script that adjust the queue config to reflect the number of cores self._register_init_script( 'gridengine-post', heredoc( """ description "GridEngine post-start configuration" console log # I would rather depend on the gridengine daemons but don't know how as they are # started by SysV init scripts. Supposedly the 'rc' job is run last. start on started rc pre-start script cores=$(grep -c '^processor' /proc/cpuinfo) qconf -mattr queue processors $cores `qselect` qconf -mattr queue slots $cores `qselect` end script""" ) ) # Run pre-start script for daemon in ('exec', 'master'): sudo( '/etc/init.d/gridengine-%s stop' % daemon ) sudo( "killall -9 -r 'sge_.*'" ) # the exec daemon likes to hang self._run_init_script( 'gridengine-pre' ) for daemon in ('master', 'exec'): sudo( '/etc/init.d/gridengine-%s start' % daemon ) # Run post-start script self._run_init_script( 'gridengine-post' ) while 'execd is in unknown state' in run( 'qstat -f -q all.q -explain a', warn_only=True ): time.sleep( 1 )
def _setup_docker( self ): for docker_user in set( self._docker_users( ) ): sudo( "usermod -aG docker " + docker_user ) prefixes = self._docker_data_prefixes( ) if prefixes: prefixes = ' '.join( map( quote, prefixes ) ) setup_docker_script = heredoc( """ #!/bin/sh echo echo "This is the dockerbox pre-start script" set -ex if mountpoint -q /var/lib/docker; then echo "The directory '/var/lib/docker' is already mounted, exiting." else for prefix in {prefixes}; do # Prefix must refer to a separate volume, e.g. ephemeral or EBS if mountpoint -q "$prefix"; then # Make sure Docker's aufs backend isn't mounted anymore umount /var/lib/docker/aufs || true if test -d "$prefix/var/lib/docker"; then echo "The directory '$prefix/var/lib/docker' already exists, using it." else mkdir -p "$prefix/var/lib" # If /var/lib/docker contains files ... if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then # ... move it to prefix ... mv /var/lib/docker "$prefix/var/lib" # ... and recreate it as an empty mount point, ... mkdir -p /var/lib/docker else # ... otherwise untar the initial backup. tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz fi fi # Now bind-mount into /var/lib/docker mount --bind "$prefix/var/lib/docker" /var/lib/docker break else echo "The prefix directory '$prefix' is not a mount point, skipping." fi done fi""" ) dockerbox_path = '/usr/sbin/dockerbox-setup.sh' systemd_heredoc = heredoc( """ [Unit] Description=Placement of /var/lib/docker Requires=docker.service After=docker.service [Service] Type=simple ExecStart={dockerbox_path} [Install] WantedBy=docker.service """) self._run_init_script( 'docker', 'stop' ) # Make sure Docker's aufs backend isn't mounted anymore sudo( 'umount /var/lib/docker/aufs', warn_only=True ) # Backup initial state of data directory so we can initialize an empty ephemeral volume sudo( 'tar -czC /var/lib docker > /var/lib/docker.tar.gz' ) # Then delete it and recreate it as an empty directory to serve as the bind mount point sudo( 'rm -rf /var/lib/docker && mkdir /var/lib/docker' ) # Pick the init script based on system settings put( local_path=StringIO( setup_docker_script ), remote_path=dockerbox_path, use_sudo=True ) sudo( "chown root:root '%s'" % dockerbox_path ) sudo( "chmod +x '%s'" % dockerbox_path ) self._register_init_script( 'dockerbox', systemd_heredoc ) self._run_init_script( 'docker', 'start' )
def __configure_gridengine(self): """ Configure the GridEngine daemons (master and exec) and creata a default queue. Ensure that the queue is updated to reflect the number of cores actually available. """ ws = re.compile(r'\s+') nl = re.compile(r'[\r\n]+') def qconf(opt, **kwargs): return qconf_dict(opt, kwargs) def qconf_dict(opt, d=None, file_name='qconf.tmp'): if d: # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works s = '\n'.join(' '.join(i) for i in d.iteritems()) + '\n' put(remote_path=file_name, local_path=StringIO(s)) sudo(' '.join(['qconf', opt, file_name])) run(' '.join(['rm', file_name])) else: return dict( tuple(ws.split(l, 1)) for l in nl.split(run('SGE_SINGLE_LINE=1 qconf ' + opt)) if l and not l.startswith('#')) # Add the user defined in fname to the Sun Grid Engine cluster. qconf('-Auser', name=Jenkins.user, oticket='0', fshare='0', delete_time='0', default_project='NONE') # Adds users to Sun Grid Engine user access lists (ACLs). sudo('qconf -au %s arusers' % Jenkins.user) # Add hosts hostname to the list of hosts allowed to submit Sun Grid Engine jobs and # control their behavior only. sudo('qconf -as localhost') # Remove all currently defined execution hosts run('for i in `qconf -sel`; do sudo qconf -de $i ; done') # Add an execution host qconf('-Ae', hostname='localhost', load_scaling='NONE', complex_values='NONE', user_lists='arusers', xuser_lists='NONE', projects='NONE', xprojects='NONE', usage_scaling='NONE', report_variables='NONE') # Add a parallel environment qconf('-Ap', pe_name='smp', slots='999', user_lists='NONE', xuser_lists='NONE', start_proc_args='/bin/true', stop_proc_args='/bin/true', allocation_rule='$pe_slots', control_slaves='FALSE', job_is_first_task='TRUE', urgency_slots='min', accounting_summary='FALSE') # Add a queue, the slots and processors will be adjusted dynamically, by an init script qconf('-Aq', qname='all.q', processors='1', slots='1', hostlist='localhost', seq_no='0', load_thresholds='np_load_avg=1.75', suspend_thresholds='NONE', nsuspend='1', suspend_interval='00:05:00', priority='0', min_cpu_interval='00:05:00', qtype='BATCH INTERACTIVE', ckpt_list='NONE', pe_list='make smp', rerun='FALSE', tmpdir='/tmp', shell='/bin/bash', prolog='NONE', epilog='NONE', shell_start_mode='posix_compliant', starter_method='NONE', suspend_method='NONE', resume_method='NONE', terminate_method='NONE', notify='00:00:60', owner_list='NONE', user_lists='arusers', xuser_lists='NONE', subordinate_list='NONE', complex_values='NONE', projects='NONE', xprojects='NONE', calendar='NONE', initial_state='default', s_rt='INFINITY', h_rt='INFINITY', s_cpu='INFINITY', h_cpu='INFINITY', s_fsize='INFINITY', h_fsize='INFINITY', s_data='INFINITY', h_data='INFINITY', s_stack='INFINITY', h_stack='INFINITY', s_core='INFINITY', h_core='INFINITY', s_rss='INFINITY', h_rss='INFINITY', s_vmem='INFINITY', h_vmem='INFINITY') # Enable on-demand scheduling. This will eliminate the long time that jobs spend waiting # in the qw state. There is no -Asconf so we have to fake it using -ssconf and -Msconf. sconf = qconf('-ssconf') sconf.update( dict(flush_submit_sec='1', flush_finish_sec='1', schedule_interval='0:0:1')) qconf_dict('-Msconf', sconf) # Enable immediate flushing of the accounting file. The SGE batch system in Toil uses the # qacct program to determine the exit code of a finished job. The qacct program reads # the accounting file. By default, this file is written to every 15 seconds which means # that it may take up to 15 seconds before a finished job is seen by Toil. An # accounting_flush_time value of 00:00:00 causes the accounting file to be flushed # immediately, allowing qacct to report the status of finished jobs immediately. Again, # there is no -Aconf, so we fake it with -sconf and -Mconf. Also, the file name has to be # 'global'. conf = qconf('-sconf') params = dict( tuple(e.split('=')) for e in conf['reporting_params'].split(' ')) params['accounting_flush_time'] = '00:00:00' conf['reporting_params'] = ' '.join('='.join(e) for e in params.iteritems()) qconf_dict('-Mconf', conf, file_name='global') # Register an init-script that ensures GridEngine uses localhost instead of hostname path = '/var/lib/gridengine/default/common/' self._register_init_script( 'gridengine-pre', heredoc(""" description "GridEngine pre-start configuration" console log start on filesystem pre-start script echo localhost > {path}/act_qmaster ; chown sgeadmin:sgeadmin {path}/act_qmaster echo localhost `hostname -f` > {path}/host_aliases end script""")) # Register an init-script that adjust the queue config to reflect the number of cores self._register_init_script( 'gridengine-post', heredoc(""" description "GridEngine post-start configuration" console log # I would rather depend on the gridengine daemons but don't know how as they are # started by SysV init scripts. Supposedly the 'rc' job is run last. start on started rc pre-start script cores=$(grep -c '^processor' /proc/cpuinfo) qconf -mattr queue processors $cores `qselect` qconf -mattr queue slots $cores `qselect` end script""")) # Run pre-start script for daemon in ('exec', 'master'): sudo('/etc/init.d/gridengine-%s stop' % daemon) sudo("killall -9 -r 'sge_.*'", warn_only=True) # the exec daemon likes to hang self._run_init_script('gridengine-pre') for daemon in ('master', 'exec'): sudo('/etc/init.d/gridengine-%s start' % daemon) # Run post-start script self._run_init_script('gridengine-post') while 'execd is in unknown state' in run( 'qstat -f -q all.q -explain a', warn_only=True): time.sleep(1)
def __install_tools( self ): """ Installs the mesos-master-discovery init script and its companion mesos-tools. The latter is a Python package distribution that's included in cgcloud-mesos as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account( ) sudo( fmt( 'mkdir -p {tools_dir}' ) ) sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) ) run( fmt( 'virtualenv --no-pip {tools_dir}' ) ) run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): with self._project_artifacts( 'mesos-tools' ) as artifacts: pip( use_sudo=True, path=tools_dir + '/bin/pip', args=concat( 'install', artifacts ) ) sudo( fmt( 'chown -R root:root {tools_dir}' ) ) mesos_tools = "MesosTools(**%r)" % dict( user=user, shared_dir=self._shared_dir( ), ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs ) self.lazy_dirs = None # make sure it can't be used anymore once we are done with it mesosbox_start_path = '/usr/sbin/mesosbox-start.sh' mesosbox_stop_path = '/usr/sbin/mesosbox-stop.sh' systemd_heredoc = heredoc( """ [Unit] Description=Mesos master discovery Requires=networking.service network-online.target After=networking.service network-online.target [Service] Type=simple ExecStart={mesosbox_start_path} RemainAfterExit=true ExecStop={mesosbox_stop_path} [Install] WantedBy=multi-user.target """ ) mesosbox_setup_start_script = heredoc( """ #!/bin/sh for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END import logging logging.basicConfig( level=logging.INFO ) from cgcloud.mesos_tools import MesosTools mesos_tools = {mesos_tools} mesos_tools.start() END then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1""" ) mesosbox_setup_stop_script = heredoc (""" #!/{tools_dir}/bin/python2.7 import logging logging.basicConfig( level=logging.INFO ) from cgcloud.mesos_tools import MesosTools mesos_tools = {mesos_tools} mesos_tools.stop()""" ) put( local_path=StringIO( mesosbox_setup_start_script ), remote_path=mesosbox_start_path, use_sudo=True ) sudo( "chown root:root '%s'" % mesosbox_start_path ) sudo( "chmod +x '%s'" % mesosbox_start_path ) put( local_path=StringIO( mesosbox_setup_stop_script ), remote_path=mesosbox_stop_path, use_sudo=True ) sudo( "chown root:root '%s'" % mesosbox_stop_path ) sudo( "chmod +x '%s'" % mesosbox_stop_path ) self._register_init_script( "mesosbox", systemd_heredoc ) # Enable mesosbox to start on boot sudo( "systemctl enable mesosbox" ) # Explicitly start the mesosbox service to achieve creation of lazy directoriess right # now. This makes a generic mesosbox useful for adhoc tests that involve Mesos and Toil. self._run_init_script( 'mesosbox' )