Exemple #1
0
def spark_service(name, script_suffix=None):
    if script_suffix is None: script_suffix = name
    script = '{install_dir}/spark/sbin/{action}-{script_suffix}.sh'
    return Service(init_name='spark-' + name,
                   description=fmt("Spark {name} service"),
                   start_script=fmt(script, action='start'),
                   stop_script=fmt(script, action='stop'))
Exemple #2
0
    def _install_apache_package( self, remote_path, install_dir ):
        """
        Download the given package from an Apache download mirror and extract it to a child 
        directory of the directory at the given path. 

        :param str remote_path: the URL path of the package on the Apache download server and its 
               mirrors.
        
        :param str install_dir: The path to a local directory in which to create the directory 
               containing the extracted package. 
        """
        # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
        components = remote_path.split( '/' )
        package, tarball = components[ 0 ], components[ -1 ]
        # Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
        tries = iter( xrange( 3 ) )
        while True:
            try:
                mirror_url = self.__apache_s3_mirror_url( remote_path )
                if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed:
                    mirror_url = self.__apache_official_mirror_url( remote_path )
                    run( "curl -Ofs '%s'" % mirror_url )
                try:
                    sudo( fmt( 'mkdir -p {install_dir}/{package}' ) )
                    sudo( fmt( 'tar -C {install_dir}/{package} '
                               '--strip-components=1 -xzf {tarball}' ) )
                    return
                finally:
                    run( fmt( 'rm {tarball}' ) )
            except SystemExit:
                if next( tries, None ) is None:
                    raise
                else:
                    log.warn( "Could not download or extract the package, retrying ..." )
Exemple #3
0
def hdfs_service( name ):
    script = '{install_dir}/hadoop/sbin/hadoop-daemon.sh {action} {name}'
    return Service(
        init_name='hdfs-' + name,
        description=fmt( "Hadoop DFS {name} service" ),
        start_script=fmt( script, action='start' ),
        stop_script=fmt( script, action='stop' ) )
Exemple #4
0
def mesos_service( name, user, *flags ):
    command = concat( '/usr/sbin/mesos-{name}', '--log_dir={log_dir}/mesos', flags )
    return Service(
        init_name='mesosbox-' + name,
        user=user,
        description=fmt( 'Mesos {name} service' ),
        command=fmt( ' '.join( command ) ) )
Exemple #5
0
    def __install_apache_package( self, path ):
        """
        Download the given file from an Apache download mirror.

        Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
        """
        # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
        components = path.split( '/' )
        package, tarball = components[ 0 ], components[ -1 ]
        tries = iter( xrange( 3 ) )
        while True:
            try:
                mirror_url = self.__apache_s3_mirror_url( path )
                if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed:
                    mirror_url = self.__apache_official_mirror_url( path )
                    run( "curl -Ofs '%s'" % mirror_url )
                try:
                    sudo( fmt( 'mkdir -p {install_dir}/{package}' ) )
                    sudo( fmt( 'tar -C {install_dir}/{package} '
                               '--strip-components=1 -xzf {tarball}' ) )
                    return
                finally:
                    run( fmt( 'rm {tarball}' ) )
            except SystemExit:
                if next( tries, None ) is None:
                    raise
                else:
                    log.warn( "Could not download or extract the package, retrying ..." )
Exemple #6
0
def hdfs_service( name ):
    script = '{install_dir}/hadoop/sbin/hadoop-daemon.sh {action} {name}'
    return Service(
        init_name='hdfs-' + name,
        description=fmt( "Hadoop DFS {name} service" ),
        start_script=fmt( script, action='start' ),
        stop_script=fmt( script, action='stop' ) )
Exemple #7
0
    def __install_apache_package(self, path):
        """
        Download the given file from an Apache download mirror.

        Some mirrors may be down or serve crap, so we may need to retry this a couple of times.
        """
        # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit
        components = path.split('/')
        package, tarball = components[0], components[-1]
        tries = iter(xrange(3))
        while True:
            try:
                mirror_url = self.__apache_s3_mirror_url(path)
                if run("curl -Ofs '%s'" % mirror_url, warn_only=True).failed:
                    mirror_url = self.__apache_official_mirror_url(path)
                    run("curl -Ofs '%s'" % mirror_url)
                try:
                    sudo(fmt('mkdir -p {install_dir}/{package}'))
                    sudo(
                        fmt('tar -C {install_dir}/{package} '
                            '--strip-components=1 -xzf {tarball}'))
                    return
                finally:
                    run(fmt('rm {tarball}'))
            except SystemExit:
                if next(tries, None) is None:
                    raise
                else:
                    log.warn(
                        "Could not download or extract the package, retrying ..."
                    )
Exemple #8
0
def spark_service( name, script_suffix=None ):
    if script_suffix is None: script_suffix = name
    script = '{install_dir}/spark/sbin/{action}-{script_suffix}.sh'
    return Service(
        init_name='spark-' + name,
        description=fmt( "Spark {name} service" ),
        start_script=fmt( script, action='start' ),
        stop_script=fmt( script, action='stop' ) )
Exemple #9
0
    def __install_tools(self):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account()
        sudo(fmt('mkdir -p {tools_dir}'))
        sudo(fmt('chown {admin}:{admin} {tools_dir}'))
        run(fmt('virtualenv --no-pip {tools_dir}'))
        run(fmt('{tools_dir}/bin/easy_install pip==1.5.2'))

        with settings(forward_agent=True):
            with self._project_artifacts('mesos-tools') as artifacts:
                pip(use_sudo=True,
                    path=tools_dir + '/bin/pip',
                    args=concat('install', artifacts))
        sudo(fmt('chown -R root:root {tools_dir}'))

        mesos_tools = "MesosTools(**%r)" % dict(user=user,
                                                shared_dir=self._shared_dir(),
                                                ephemeral_dir=ephemeral_dir,
                                                persistent_dir=persistent_dir,
                                                lazy_dirs=self.lazy_dirs)

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "mesosbox",
            heredoc("""
                description "Mesos master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()
                END
                end script"""))
        # Explicitly start the mesosbox service to achieve creation of lazy directoriess right
        # now. This makes a generic mesosbox useful for adhoc tests that involve Mesos and Toil.
        self._run_init_script('mesosbox')
Exemple #10
0
    def __install_tools( self ):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account( )
        sudo( fmt( 'mkdir -p {tools_dir}' ) )
        sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
        run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            with self._project_artifacts( 'mesos-tools' ) as artifacts:
                pip( use_sudo=True,
                     path=tools_dir + '/bin/pip',
                     args=concat( 'install', artifacts ) )
        sudo( fmt( 'chown -R root:root {tools_dir}' ) )

        mesos_tools = "MesosTools(**%r)" % dict( user=user,
                                                 shared_dir=self._shared_dir( ),
                                                 ephemeral_dir=ephemeral_dir,
                                                 persistent_dir=persistent_dir,
                                                 lazy_dirs=self.lazy_dirs )

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "mesosbox",
            heredoc( """
                description "Mesos master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()
                END
                end script""" ) )
        # Explicitly start the mesosbox service to achieve creation of lazy directoriess right
        # now. This makes a generic mesosbox useful for adhoc tests that involve Mesos and Toil.
        self._run_init_script( 'mesosbox' )
Exemple #11
0
def mesos_service( name, script_suffix=None ):
    if script_suffix is None: script_suffix = name
    script = 'usr/sbin/mesos-{name}'
    flag = fmt("--log_dir=/var/log/mesosbox/mesos{name} ")
    if name is 'slave': flag += '--master=\'mesos-master\':5050 --no-switch_user'
    else: flag += '--registry=in_memory'
    return Service(
        init_name='mesosbox-' + name,
        description=fmt( "Mesos {name} service" ),
        start_script='',
        action=fmt(script+" "+flag),
        stop_script="")
Exemple #12
0
    def __install_yarn(self):
        # Download and extract Hadoop
        path = fmt(
            'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz'
        )
        self._install_apache_package(path, install_dir)

        # patch path
        with remote_open('/etc/environment', use_sudo=True) as f:
            yarn_path = fmt('{install_dir}/hadoop')
            self._patch_etc_environment(f,
                                        env_pairs=dict(HADOOP_HOME=yarn_path))
Exemple #13
0
 def _setup_package_repos(self):
     super(MesosBox, self)._setup_package_repos()
     sudo('apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF')
     codename = self.release().codename
     sudo(
         fmt('echo "deb http://repos.mesosphere.io/ubuntu {codename} main" '
             '> /etc/apt/sources.list.d/mesosphere.list'))
Exemple #14
0
 def __setup_path( self ):
     globally = True
     if globally:
         with remote_open( '/etc/environment', use_sudo=True ) as f:
             new_path = [ fmt( '{install_dir}/{package}/bin' )
                 for package in ('spark', 'hadoop') ]
             self._patch_etc_environment( f, dirs=new_path )
     else:
         for _user in (user, self.admin_account( )):
             with settings( user=_user ):
                 with remote_open( '~/.profile' ) as f:
                     f.write( '\n' )
                     for package in ('spark', 'hadoop'):
                         # We don't include sbin here because too many file names collide in
                         # Spark's and Hadoop's sbin
                         f.write( fmt( 'PATH="$PATH:{install_dir}/{package}/bin"\n' ) )
Exemple #15
0
 def __setup_application_user(self):
     sudo(
         fmt('useradd '
             '--home /home/{user} '
             '--create-home '
             '--user-group '
             '--shell /bin/bash {user}'))
Exemple #16
0
 def __setup_path( self ):
     globally = True
     if globally:
         with remote_open( '/etc/environment', use_sudo=True ) as f:
             new_path = [ fmt( '{install_dir}/{package}/bin' )
                 for package in ('spark', 'hadoop') ]
             self.__patch_etc_environment( f, new_path )
     else:
         for _user in (user, self.admin_account( )):
             with settings( user=_user ):
                 with remote_open( '~/.profile' ) as f:
                     f.write( '\n' )
                     for package in ('spark', 'hadoop'):
                         # We don't include sbin here because too many file names collide in
                         # Spark's and Hadoop's sbin
                         f.write( fmt( 'PATH="$PATH:{install_dir}/{package}/bin"\n' ) )
Exemple #17
0
 def _install_mesos_egg( self ):
     egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg'
     version = self.release( ).version
     run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) )
     # We need a newer version of protobuf than what comes default on Ubuntu
     pip( 'install --upgrade protobuf', use_sudo=True )
     sudo( 'easy_install -a ' + egg )
     run( 'rm ' + egg )
Exemple #18
0
 def __install_mesos_egg( self ):
     egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg'
     version = self.release( ).version
     run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) )
     # We need a newer version of protobuf than what comes default on Ubuntu
     pip( 'install --upgrade protobuf', use_sudo=True )
     sudo( 'easy_install -a ' + egg )
     run( 'rm ' + egg )
Exemple #19
0
 def __create_mesos_keypair( self ):
     self._provide_imported_keypair( ec2_keypair_name=self.__ec2_keypair_name( self.ctx ),
                                     private_key_path=fmt( "/home/{user}/.ssh/id_rsa" ),
                                     overwrite_ec2=True )
     # This trick allows us to roam freely within the cluster as the app user while still
     # being able to have keypairs in authorized_keys managed by cgcloudagent such that
     # external users can login as the app user, too. The trick depends on AuthorizedKeysFile
     # defaulting to or being set to .ssh/autorized_keys and .ssh/autorized_keys2 in sshd_config
     run( "cd .ssh && cat id_rsa.pub >> authorized_keys2" )
Exemple #20
0
 def __create_spark_keypair( self ):
     self._provide_imported_keypair( ec2_keypair_name=self.__ec2_keypair_name( self.ctx ),
                                     private_key_path=fmt( "/home/{user}/.ssh/id_rsa" ),
                                     overwrite_ec2=True )
     # This trick allows us to roam freely within the cluster as the app user while still
     # being able to have keypairs in authorized_keys managed by cgcloudagent such that
     # external users can login as the app user, too. The trick depends on AuthorizedKeysFile
     # defaulting to or being set to .ssh/autorized_keys and .ssh/autorized_keys2 in sshd_config
     run( "cd .ssh && cat id_rsa.pub >> authorized_keys2" )
Exemple #21
0
 def _setup_package_repos( self ):
     assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \
         "Need HTTPS support in apt-get in order to install from the Docker repository"
     super( DockerBox, self )._setup_package_repos( )
     sudo( ' '.join( [ 'apt-key', 'adv',
                         '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80',
                         '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) )
     codename = self.release( ).codename
     sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main '
                '> /etc/apt/sources.list.d/docker.list' ) )
Exemple #22
0
 def _setup_package_repos( self ):
     assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \
         "Need HTTPS support in apt-get in order to install from the Docker repository"
     super( DockerBox, self )._setup_package_repos( )
     sudo( ' '.join( [ 'apt-key', 'adv',
                         '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80',
                         '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) )
     codename = self.release( ).codename
     sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main '
                '> /etc/apt/sources.list.d/docker.list' ) )
Exemple #23
0
    def __install_mesosbox_tools( self ):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        sudo( fmt( 'mkdir -p {tools_dir}') )
        sudo( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        sudo( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        mesos_tools_artifacts = ' '.join( self._project_artifacts( 'mesos-tools' ) )
        with settings( forward_agent=True ):
            sudo( fmt( '{tools_dir}/bin/pip install {mesos_tools_artifacts}' ) )

        mesos_tools = "MesosTools(**%r)" % dict(user=user )
        self._register_init_script(
            "mesosbox",
            heredoc( """
                description "Mesos master discovery"
                console log
                start on runlevel [2345]
                stop on runlevel [016]
                pre-start script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()
                END
                end script""" ) )
Exemple #24
0
    def __install_spark(self):
        # Download and extract Spark
        path = fmt(
            'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz'
        )
        self._install_apache_package(path, install_dir)

        # Patch paths
        with remote_open('/etc/environment', use_sudo=True) as f:
            spark_home = fmt('{install_dir}/spark')
            # These two PYTHONPATH entries are also added by the 'pyspark' wrapper script.
            # We need to replicate them globally because we want to be able to just do
            # 'import pyspark' in Toil's Spark service code and associated tests.
            python_path = [
                fmt('{spark_home}/python'),
                run(fmt('ls {spark_home}/python/lib/py4j-*-src.zip').strip())
            ]
            self._patch_etc_environment(f,
                                        env_pairs=dict(SPARK_HOME=spark_home),
                                        dirs=python_path,
                                        dirs_var='PYTHONPATH')
Exemple #25
0
 def __install_sparkbox_tools( self ):
     """
     Installs the spark-master-discovery init script and its companion spark-tools. The latter
     is a Python package distribution that's included in cgcloud-spark as a resource. This is
     in contrast to the cgcloud agent, which is a standalone distribution.
     """
     tools_dir = install_dir + '/tools'
     admin = self.admin_account( )
     sudo( fmt( 'mkdir -p {tools_dir} {persistent_dir} {ephemeral_dir}' ) )
     sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
     run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
     run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )
     spark_tools_artifacts = ' '.join( self._project_artifacts( 'spark-tools' ) )
     with settings( forward_agent=True ):
         run( fmt( '{tools_dir}/bin/pip install {spark_tools_artifacts}' ) )
     sudo( fmt( 'chown -R root:root {tools_dir}' ) )
     spark_tools = "SparkTools(**%r)" % dict( user=user,
                                              install_dir=install_dir,
                                              ephemeral_dir=ephemeral_dir,
                                              persistent_dir=persistent_dir,
                                              lazy_dirs=self.lazy_dirs )
     self._register_init_script(
         "sparkbox",
         heredoc( """
             description "Spark/HDFS master discovery"
             console log
             start on runlevel [2345]
             stop on runlevel [016]
             pre-start script
             {tools_dir}/bin/python2.7 - <<END
             import logging
             logging.basicConfig( level=logging.INFO )
             from cgcloud.spark_tools import SparkTools
             spark_tools = {spark_tools}
             spark_tools.start()
             end script
             post-stop script
             {tools_dir}/bin/python2.7 - <<END
             import logging
             logging.basicConfig( level=logging.INFO )
             from cgcloud.spark_tools import SparkTools
             spark_tools = {spark_tools}
             spark_tools.stop()
             END
             end script""" ) )
     script_path = "/usr/local/bin/sparkbox-manage-slaves"
     put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """
         #!{tools_dir}/bin/python2.7
         import sys
         import logging
         logging.basicConfig( level=logging.INFO )
         from cgcloud.spark_tools import SparkTools
         spark_tools = {spark_tools}
         spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) )
     sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
Exemple #26
0
    def __install_spark(self):
        # Download and extract Spark
        path = fmt(
            'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz'
        )
        self._install_apache_package(path, install_dir)

        spark_dir = var_dir + "/spark"

        # Add environment variables to spark_env.sh
        spark_env_sh_path = fmt("{install_dir}/spark/conf/spark-env.sh")
        sudo(fmt("cp {spark_env_sh_path}.template {spark_env_sh_path}"))
        spark_env = dict(
            SPARK_LOG_DIR=self._lazy_mkdir(log_dir, "spark"),
            SPARK_WORKER_DIR=self._lazy_mkdir(spark_dir, "work"),
            SPARK_LOCAL_DIRS=self._lazy_mkdir(spark_dir, "local"),
            JAVA_HOME='/usr/lib/jvm/java-8-oracle',
            SPARK_MASTER_IP='spark-master',
            HADOOP_CONF_DIR=fmt("{install_dir}/hadoop/etc/hadoop"))
        with remote_open(spark_env_sh_path, use_sudo=True) as spark_env_sh:
            spark_env_sh.write('\n')
            for name, value in spark_env.iteritems():
                spark_env_sh.write(fmt('export {name}="{value}"\n'))

        # Configure Spark properties
        spark_defaults = {
            'spark.eventLog.enabled': 'true',
            'spark.eventLog.dir': self._lazy_mkdir(spark_dir, "history"),
            'spark.master': 'spark://spark-master:7077'
        }
        spark_defaults_conf_path = fmt(
            "{install_dir}/spark/conf/spark-defaults.conf")
        sudo(
            fmt("cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}"
                ))
        with remote_open(spark_defaults_conf_path,
                         use_sudo=True) as spark_defaults_conf:
            for name, value in spark_defaults.iteritems():
                spark_defaults_conf.write(fmt("{name}\t{value}\n"))

        # Make shell auto completion easier
        sudo(fmt('find {install_dir}/spark -name "*.cmd" | xargs rm'))

        # Install upstart jobs
        self.__register_upstart_jobs(spark_services)
Exemple #27
0
    def __install_spark( self ):
        # Download and extract Spark
        path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' )
        self._install_apache_package( path, install_dir )

        spark_dir = var_dir + "/spark"

        # Add environment variables to spark_env.sh
        spark_env_sh_path = fmt( "{install_dir}/spark/conf/spark-env.sh" )
        sudo( fmt( "cp {spark_env_sh_path}.template {spark_env_sh_path}" ) )
        spark_env = dict(
            SPARK_LOG_DIR=self._lazy_mkdir( log_dir, "spark" ),
            SPARK_WORKER_DIR=self._lazy_mkdir( spark_dir, "work" ),
            SPARK_LOCAL_DIRS=self._lazy_mkdir( spark_dir, "local" ),
            JAVA_HOME='/usr/lib/jvm/java-8-oracle',
            SPARK_MASTER_IP='spark-master',
            HADOOP_CONF_DIR=fmt( "{install_dir}/hadoop/etc/hadoop" ),
            SPARK_PUBLIC_DNS="$(curl -s http://169.254.169.254/latest/meta-data/public-hostname)" )
        with remote_open( spark_env_sh_path, use_sudo=True ) as spark_env_sh:
            spark_env_sh.write( '\n' )
            for name, value in spark_env.iteritems( ):
                spark_env_sh.write( fmt( 'export {name}="{value}"\n' ) )

        # Configure Spark properties
        spark_defaults = {
            'spark.eventLog.enabled': 'true',
            'spark.eventLog.dir': self._lazy_mkdir( spark_dir, "history" ),
            'spark.master': 'spark://spark-master:7077'
        }
        spark_defaults_conf_path = fmt( "{install_dir}/spark/conf/spark-defaults.conf" )
        sudo( fmt( "cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}" ) )
        with remote_open( spark_defaults_conf_path, use_sudo=True ) as spark_defaults_conf:
            for name, value in spark_defaults.iteritems( ):
                spark_defaults_conf.write( fmt( "{name}\t{value}\n" ) )

        # Make shell auto completion easier
        sudo( fmt( 'find {install_dir}/spark -name "*.cmd" | xargs rm' ) )

        # Install upstart jobs
        self.__register_upstart_jobs( spark_services )
Exemple #28
0
    def __install_hadoop(self):
        # Download and extract Hadoop
        path = fmt(
            'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz'
        )
        self._install_apache_package(path, install_dir)

        # Add environment variables to hadoop_env.sh
        hadoop_env = dict(HADOOP_LOG_DIR=self._lazy_mkdir(log_dir, "hadoop"),
                          JAVA_HOME='/usr/lib/jvm/java-8-oracle')
        hadoop_env_sh_path = fmt(
            "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh")
        with remote_open(hadoop_env_sh_path, use_sudo=True) as hadoop_env_sh:
            hadoop_env_sh.write('\n')
            for name, value in hadoop_env.iteritems():
                hadoop_env_sh.write(fmt('export {name}="{value}"\n'))

        # Configure HDFS
        hdfs_dir = var_dir + "/hdfs"
        put(use_sudo=True,
            remote_path=fmt('{install_dir}/hadoop/etc/hadoop/hdfs-site.xml'),
            local_path=StringIO(
                self.__to_hadoop_xml_config({
                    'dfs.replication':
                    str(hdfs_replication),
                    'dfs.permissions':
                    'false',
                    'dfs.name.dir':
                    self._lazy_mkdir(hdfs_dir, 'name', persistent=True),
                    'dfs.data.dir':
                    self._lazy_mkdir(hdfs_dir, 'data', persistent=True),
                    'fs.checkpoint.dir':
                    self._lazy_mkdir(hdfs_dir, 'checkpoint', persistent=True),
                    'dfs.namenode.http-address':
                    'spark-master:50070',
                    'dfs.namenode.secondary.http-address':
                    'spark-master:50090'
                })))

        # Configure Hadoop
        put(use_sudo=True,
            remote_path=fmt('{install_dir}/hadoop/etc/hadoop/core-site.xml'),
            local_path=StringIO(
                self.__to_hadoop_xml_config(
                    {'fs.default.name': 'hdfs://spark-master:8020'})))

        # Make shell auto completion easier
        sudo(fmt('find {install_dir}/hadoop -name "*.cmd" | xargs rm'))

        # Install upstart jobs
        self.__register_upstart_jobs(hadoop_services)
Exemple #29
0
    def __setup_agent(self):
        availability_zone = self.ctx.availability_zone
        namespace = self.ctx.namespace
        ec2_keypair_globs = ' '.join(
            shell.quote(_) for _ in self.ec2_keypair_globs)
        accounts = ' '.join([self.admin_account()] + self.other_accounts())
        admin_account = self.admin_account()
        run_dir = '/var/run/cgcloudagent'
        log_dir = '/var/log'
        install_dir = '/opt/cgcloudagent'

        # Lucid & CentOS 5 have an ancient pip
        pip('install --upgrade pip==1.5.2', use_sudo=True)
        pip('install --upgrade virtualenv', use_sudo=True)
        sudo(fmt('mkdir -p {install_dir}'))
        sudo(fmt('chown {admin_account}:{admin_account} {install_dir}'))
        # By default, virtualenv installs the latest version of pip. We want a specific
        # version, so we tell virtualenv not to install pip and then install that version of
        # pip using easy_install.
        run(fmt('virtualenv --no-pip {install_dir}'))
        run(fmt('{install_dir}/bin/easy_install pip==1.5.2'))

        with settings(forward_agent=True):
            venv_pip = install_dir + '/bin/pip'
            if self._enable_agent_metrics():
                pip(path=venv_pip, args='install psutil==3.4.1')
            with self._project_artifacts('agent') as artifacts:
                pip(
                    path=venv_pip,
                    args=concat(
                        'install',
                        '--allow-external',
                        'argparse',  # needed on CentOS 5 and 6
                        artifacts))

        sudo(fmt('mkdir {run_dir}'))
        script = self.__gunzip_base64_decode(
            run(
                fmt('{install_dir}/bin/cgcloudagent'
                    ' --init-script'
                    ' --zone {availability_zone}'
                    ' --namespace {namespace}'
                    ' --accounts {accounts}'
                    ' --keypairs {ec2_keypair_globs}'
                    ' --user root'
                    ' --group root'
                    ' --pid-file {run_dir}/cgcloudagent.pid'
                    ' --log-spill {log_dir}/cgcloudagent.out'
                    '| gzip -c | base64')))
        self._register_init_script('cgcloudagent', script)
        self._run_init_script('cgcloudagent')
Exemple #30
0
    def __setup_application_user( self ):
        sudo( fmt( 'useradd '
                   '--home /home/{user} '
                   '--create-home '
                   '--user-group '
                   '--shell /bin/bash {user}' ) )

        sudoer_file = heredoc( """
            # CGcloud - MesosBox

            # User rules for ubuntu
            mesosbox ALL=(ALL) NOPASSWD:ALL

            # User rules for ubuntu
            mesosbox ALL=(ALL) NOPASSWD:ALL
            """ )

        sudoer_file_path = '/etc/sudoers.d/89-mesosbox-user'
        put( local_path=StringIO( sudoer_file ), remote_path=sudoer_file_path, use_sudo=True, mode=0440 )
        sudo( "chown root:root '%s'" % sudoer_file_path )
Exemple #31
0
    def __setup_agent( self ):
        availability_zone = self.ctx.availability_zone
        namespace = self.ctx.namespace
        ec2_keypair_globs = ' '.join( shell.quote( _ ) for _ in self.ec2_keypair_globs )
        accounts = ' '.join( [ self.admin_account( ) ] + self.other_accounts( ) )
        admin_account = self.admin_account( )
        run_dir = '/var/run/cgcloudagent'
        log_dir = '/var/log'
        install_dir = '/opt/cgcloudagent'

        # Lucid & CentOS 5 have an ancient pip
        pip( 'install --upgrade pip==1.5.2', use_sudo=True )
        pip( 'install --upgrade virtualenv', use_sudo=True )
        sudo( fmt( 'mkdir -p {install_dir}' ) )
        sudo( fmt( 'chown {admin_account}:{admin_account} {install_dir}' ) )
        # By default, virtualenv installs the latest version of pip. We want a specific
        # version, so we tell virtualenv not to install pip and then install that version of
        # pip using easy_install.
        run( fmt( 'virtualenv --no-pip {install_dir}' ) )
        run( fmt( '{install_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            venv_pip = install_dir + '/bin/pip'
            if self._enable_agent_metrics( ):
                pip( path=venv_pip, args='install psutil==3.4.1' )
            with self._project_artifacts( 'agent' ) as artifacts:
                pip( path=venv_pip,
                     args=concat( 'install',
                                  '--allow-external', 'argparse',  # needed on CentOS 5 and 6
                                  artifacts ) )

        sudo( fmt( 'mkdir {run_dir}' ) )
        script = self.__gunzip_base64_decode( run( fmt(
            '{install_dir}/bin/cgcloudagent'
            ' --init-script'
            ' --zone {availability_zone}'
            ' --namespace {namespace}'
            ' --accounts {accounts}'
            ' --keypairs {ec2_keypair_globs}'
            ' --user root'
            ' --group root'
            ' --pid-file {run_dir}/cgcloudagent.pid'
            ' --log-spill {log_dir}/cgcloudagent.out'
            '| gzip -c | base64' ) ) )
        self._register_init_script( 'cgcloudagent', script )
        self._run_init_script( 'cgcloudagent' )
Exemple #32
0
 def _setup_package_repos(self):
     assert run(
         "test -e /usr/lib/apt/methods/https", warn_only=True
     ).succeeded, "Need HTTPS support in apt-get in order to install from the Docker repository"
     super(DockerBox, self)._setup_package_repos()
     sudo(
         " ".join(
             [
                 "apt-key",
                 "adv",
                 "--keyserver",
                 "hkp://p80.pool.sks-keyservers.net:80",
                 "--recv-keys",
                 "58118E89F3A912897C070ADBF76221572C52609D",
             ]
         )
     )
     codename = self.release().codename
     sudo(
         fmt(
             "echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main "
             "> /etc/apt/sources.list.d/docker.list"
         )
     )
Exemple #33
0
    def __install_hadoop( self ):
        # Download and extract Hadoop
        path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' )
        self.__install_apache_package( path )

        # Add environment variables to hadoop_env.sh
        hadoop_env = dict(
            HADOOP_LOG_DIR=self._lazy_mkdir( log_dir, "hadoop" ),
            JAVA_HOME='/usr/lib/jvm/java-7-oracle' )
        hadoop_env_sh_path = fmt( "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh" )
        with remote_open( hadoop_env_sh_path, use_sudo=True ) as hadoop_env_sh:
            hadoop_env_sh.write( '\n' )
            for name, value in hadoop_env.iteritems( ):
                hadoop_env_sh.write( fmt( 'export {name}="{value}"\n' ) )

        # Configure HDFS
        hdfs_dir = var_dir + "/hdfs"
        put( use_sudo=True,
             remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/hdfs-site.xml' ),
             local_path=StringIO( self.__to_hadoop_xml_config( {
                 'dfs.replication': str( hdfs_replication ),
                 'dfs.permissions': 'false',
                 'dfs.name.dir': self._lazy_mkdir( hdfs_dir, 'name', persistent=True ),
                 'dfs.data.dir': self._lazy_mkdir( hdfs_dir, 'data', persistent=True ),
                 'fs.checkpoint.dir': self._lazy_mkdir( hdfs_dir, 'checkpoint', persistent=True ),
                 'dfs.namenode.http-address': 'spark-master:50070',
                 'dfs.namenode.secondary.http-address': 'spark-master:50090' } ) ) )

        # Configure Hadoop
        put( use_sudo=True,
             remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/core-site.xml' ),
             local_path=StringIO( self.__to_hadoop_xml_config( {
                 'fs.default.name': 'hdfs://spark-master:8020' } ) ) )

        # Make shell auto completion easier
        sudo( fmt( 'find {install_dir}/hadoop -name "*.cmd" | xargs rm' ) )

        # Install upstart jobs
        self.__register_upstart_jobs( hadoop_services )
Exemple #34
0
def heredoc( s ):
    if s[ 0 ] == '\n': s = s[ 1: ]
    if s[ -1 ] != '\n': s += '\n'
    return fmt( dedent( s ), skip_frames=1 )
Exemple #35
0
 def __format_hdfs( self ):
     run( fmt( '{install_dir}/hadoop/bin/hadoop namenode -format -nonInteractive' ) )
Exemple #36
0
    def __install_tools( self ):
        """
        Installs the spark-master-discovery init script and its companion spark-tools. The latter
        is a Python package distribution that's included in cgcloud-spark as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account( )
        sudo( fmt( 'mkdir -p {tools_dir}' ) )
        sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
        run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            with self._project_artifacts( 'spark-tools' ) as artifacts:
                pip( use_sudo=True,
                     path=tools_dir + '/bin/pip',
                     args=concat( 'install', artifacts ) )
        sudo( fmt( 'chown -R root:root {tools_dir}' ) )

        spark_tools = "SparkTools(**%r)" % dict( user=user,
                                                 shared_dir=self._shared_dir( ),
                                                 install_dir=install_dir,
                                                 ephemeral_dir=ephemeral_dir,
                                                 persistent_dir=persistent_dir,
                                                 lazy_dirs=self.lazy_dirs )

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "sparkbox",
            heredoc( """
                description "Spark/HDFS master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.stop()
                END
                end script""" ) )

        script_path = "/usr/local/bin/sparkbox-manage-slaves"
        put( remote_path=script_path, use_sudo=True, local_path=StringIO( heredoc( """
            #!{tools_dir}/bin/python2.7
            import sys
            import logging
            # Prefix each log line to make it more obvious that it's the master logging when the
            # slave calls this script via ssh.
            logging.basicConfig( level=logging.INFO,
                                 format="manage_slaves: " + logging.BASIC_FORMAT )
            from cgcloud.spark_tools import SparkTools
            spark_tools = {spark_tools}
            spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""" ) ) )
        sudo( fmt( "chown root:root {script_path} && chmod 755 {script_path}" ) )
Exemple #37
0
 def _setup_package_repos( self ):
     super( MesosBox, self )._setup_package_repos( )
     sudo( 'apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF' )
     codename = self.release( ).codename
     sudo( fmt( 'echo "deb http://repos.mesosphere.io/ubuntu {codename} main" '
                '> /etc/apt/sources.list.d/mesosphere.list' ) )
Exemple #38
0
 def __setup_application_user( self ):
     sudo( fmt( 'useradd '
                '--home /home/{user} '
                '--create-home '
                '--user-group '
                '--shell /bin/bash {user}' ) )
Exemple #39
0
def mesos_service( name, *flags ):
    command = concat( '/usr/sbin/mesos-{name}', '--log_dir={log_dir}/mesos', flags )
    return Service(
        init_name='mesosbox-' + name,
        description=fmt( 'Mesos {name} service' ),
        command=fmt( ' '.join( command ) ) )
Exemple #40
0
    def __install_tools( self ):
        """
        Installs the mesos-master-discovery init script and its companion mesos-tools. The latter
        is a Python package distribution that's included in cgcloud-mesos as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account( )
        sudo( fmt( 'mkdir -p {tools_dir}' ) )
        sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) )
        run( fmt( 'virtualenv --no-pip {tools_dir}' ) )
        run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) )

        with settings( forward_agent=True ):
            with self._project_artifacts( 'mesos-tools' ) as artifacts:
                pip( use_sudo=True,
                     path=tools_dir + '/bin/pip',
                     args=concat( 'install', artifacts ) )
        sudo( fmt( 'chown -R root:root {tools_dir}' ) )

        mesos_tools = "MesosTools(**%r)" % dict( user=user,
                                                 shared_dir=self._shared_dir( ),
                                                 ephemeral_dir=ephemeral_dir,
                                                 persistent_dir=persistent_dir,
                                                 lazy_dirs=self.lazy_dirs )

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        mesosbox_start_path = '/usr/sbin/mesosbox-start.sh'
        mesosbox_stop_path = '/usr/sbin/mesosbox-stop.sh'
        systemd_heredoc = heredoc( """
            [Unit]
            Description=Mesos master discovery
            Requires=networking.service network-online.target
            After=networking.service network-online.target

            [Service]
            Type=simple
            ExecStart={mesosbox_start_path}
            RemainAfterExit=true
            ExecStop={mesosbox_stop_path}

            [Install]
            WantedBy=multi-user.target
            """ )

        mesosbox_setup_start_script = heredoc( """
                #!/bin/sh
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1""" )

        mesosbox_setup_stop_script = heredoc ("""
                #!/{tools_dir}/bin/python2.7
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.mesos_tools import MesosTools
                mesos_tools = {mesos_tools}
                mesos_tools.stop()""" )

        put( local_path=StringIO( mesosbox_setup_start_script ), remote_path=mesosbox_start_path, use_sudo=True )
        sudo( "chown root:root '%s'" % mesosbox_start_path )
        sudo( "chmod +x '%s'" % mesosbox_start_path )

        put( local_path=StringIO( mesosbox_setup_stop_script ), remote_path=mesosbox_stop_path, use_sudo=True )
        sudo( "chown root:root '%s'" % mesosbox_stop_path )
        sudo( "chmod +x '%s'" % mesosbox_stop_path )

        self._register_init_script(
            "mesosbox",
            systemd_heredoc )

        # Enable mesosbox to start on boot
        sudo( "systemctl enable mesosbox" )

        # Explicitly start the mesosbox service to achieve creation of lazy directoriess right
        # now. This makes a generic mesosbox useful for adhoc tests that involve Mesos and Toil.
        self._run_init_script( 'mesosbox' )
Exemple #41
0
    def __install_tools(self):
        """
        Installs the spark-master-discovery init script and its companion spark-tools. The latter
        is a Python package distribution that's included in cgcloud-spark as a resource. This is
        in contrast to the cgcloud agent, which is a standalone distribution.
        """
        tools_dir = install_dir + '/tools'
        admin = self.admin_account()
        sudo(fmt('mkdir -p {tools_dir}'))
        sudo(fmt('chown {admin}:{admin} {tools_dir}'))
        run(fmt('virtualenv --no-pip {tools_dir}'))
        run(fmt('{tools_dir}/bin/easy_install pip==1.5.2'))

        with settings(forward_agent=True):
            with self._project_artifacts('spark-tools') as artifacts:
                pip(use_sudo=True,
                    path=tools_dir + '/bin/pip',
                    args=concat('install', artifacts))
        sudo(fmt('chown -R root:root {tools_dir}'))

        spark_tools = "SparkTools(**%r)" % dict(user=user,
                                                shared_dir=self._shared_dir(),
                                                install_dir=install_dir,
                                                ephemeral_dir=ephemeral_dir,
                                                persistent_dir=persistent_dir,
                                                lazy_dirs=self.lazy_dirs)

        self.lazy_dirs = None  # make sure it can't be used anymore once we are done with it

        self._register_init_script(
            "sparkbox",
            heredoc("""
                description "Spark/HDFS master discovery"
                console log
                start on (local-filesystems and net-device-up IFACE!=lo)
                stop on runlevel [!2345]
                pre-start script
                for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.start()
                END
                then exit 0; fi; echo Retrying in 60s; sleep 60; done; exit 1
                end script
                post-stop script
                {tools_dir}/bin/python2.7 - <<END
                import logging
                logging.basicConfig( level=logging.INFO )
                from cgcloud.spark_tools import SparkTools
                spark_tools = {spark_tools}
                spark_tools.stop()
                END
                end script"""))

        script_path = "/usr/local/bin/sparkbox-manage-slaves"
        put(remote_path=script_path,
            use_sudo=True,
            local_path=StringIO(
                heredoc("""
            #!{tools_dir}/bin/python2.7
            import sys
            import logging
            # Prefix each log line to make it more obvious that it's the master logging when the
            # slave calls this script via ssh.
            logging.basicConfig( level=logging.INFO,
                                 format="manage_slaves: " + logging.BASIC_FORMAT )
            from cgcloud.spark_tools import SparkTools
            spark_tools = {spark_tools}
            spark_tools.manage_slaves( slaves_to_add=sys.argv[1:] )""")))
        sudo(fmt("chown root:root {script_path} && chmod 755 {script_path}"))