Esempio n. 1
0
def get_xml_params(f, param_names):

    if not param_names:
        return {}

    local_param_names = param_names[:]

    params = {}
    for name in local_param_names:
        params[name] = None

    with open(f) as inf:
        line = inf.readline()
        while line != "":
            for name in local_param_names:
                if "<name>" + name + "</name>" in line:
                    if "<value>" in line:
                        match = re.match('.*<value>([^<]*)</value>.*', line)
                        params[name] = match.group(1)
                    else:
                        line = inf.readline()
                        if line != "":
                            match = re.match('.*<value>([^<]*)</value>.*', line)
                            params[name] = match.group(1)
                        else:
                            logger.error("Configuration file " + f +
                                         " is not correctly formatted")

                    del(name)
                line = inf.readline()
        inf.close()

    return params
Esempio n. 2
0
 def _exec_on_node(self, command, machine, log):
     logger.info(log)
     rem = ex.action.Remote(command, machine, connection_params={'user':'******'}).run()
     if rem.ok :
         logger.info("Success")
     else:
         logger.error("Failure")
Esempio n. 3
0
    def load(self):
        """Load the configuration file"""

        # Load the configuration file
        try:
            with open(self.config_path) as config_file:
                config = yaml.load(config_file)
        except:
            logger.error("Error reading configuration file %s" %
                         self.config_path)
            t, value, tb = sys.exc_info()
            print("%s %s" % (str(t), str(value)))
            sys.exit(23)

        # Load g5k networks
        with open(NETWORK_FILE) as network_file:
            self.networks = yaml.load(network_file)


        self.config = {}
        self.config.update(DEFAULT_CONFIG)
        self.config.update(config)

        logger.info("Configuration file loaded : %s" % self.config_path)
        logger.info(pf(self.config))

        return self.config
Esempio n. 4
0
    def bootstrap(self, tar_file):

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess(
            'echo $(readlink -f /usr/bin/javac | '
            'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy Hive tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote(
            "rm -rf " + self.base_dir + " " + self.conf_dir + " " +
            self.warehouse_dir + " " + self.logs_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") +
            " " + self.base_dir, self.hosts)
        mkdirs = TaktukRemote(
            "mkdir -p " + self.conf_dir + " && mkdir -p " + self.warehouse_dir,
            self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir +
            " && chmod g+w " + self.warehouse_dir, self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 3. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hive-env.sh << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "HIVE_HOME=" + self.base_dir + "\n"
        command += "HIVE_CONF_DIR=" + self.conf_dir + "\n"
        command += "HADOOP_HOME=" + self.hc.base_dir + "\n"
        command += "EOF\n"
        command += "chmod +x " + self.conf_dir + "/hive-env.sh"
        action = Remote(command, self.hosts)
        action.run()
Esempio n. 5
0
    def get_host(self):
        """Returns the hosts from an existing reservation (if any), or from
		a new reservation"""

        # Look if there is a running job
        self.site = get_cluster_site(self.config['cluster'])
        jobs = EX5.get_current_oar_jobs([self.site])

        self.job_id = None
        for t in jobs:
            if EX5.get_oar_job_info(
                    t[0], self.site)['name'] == self.options.job_name:
                self.job_id = t[0]
                break

        if self.job_id:
            logger.info('Using job %s' % style.emph(self.job_id))
        else:
            logger.info('Making a new reservation')
            self._make_reservation(self.site)

        if not self.job_id:
            logger.error("Could not get a reservation for the job")
            exit(6)

        EX5.wait_oar_job_start(self.job_id, self.site)

        pp(EX5.get_oar_job_nodes(self.job_id, self.site))
        return EX5.get_oar_job_nodes(self.job_id, self.site)[0]
Esempio n. 6
0
    def deploy(self):
        # we put the nodes in the first vlan we have
        vlan = self._get_primary_vlan()
         # Deploy all the nodes
        logger.info("Deploying %s on %d nodes %s" % (self.config['env_name'],
            len(self.nodes),
            '(forced)' if self.force_deploy else ''))

        deployed, undeployed = EX5.deploy(
        EX5.Deployment(
            self.nodes,
            env_name=self.config['env_name'],
            vlan = vlan[1]
        ), check_deployed_command=not self.force_deploy)

        # Check the deployment
        if len(undeployed) > 0:
            logger.error("%d nodes where not deployed correctly:" % len(undeployed))
            for n in undeployed:
                logger.error(style.emph(n))

        # Updating nodes names with vlans
        self.nodes = sorted(translate_to_vlan(self.nodes, vlan[1]),
                            key = lambda n: n.address)
        logger.info(self.nodes)
        self.deployed_nodes = sorted(translate_to_vlan(
                                        map(lambda n: EX.Host(n), deployed), vlan[1]),
                                key = lambda n: n.address)
        logger.info(self.deployed_nodes)
        check_nodes(
                nodes = self.deployed_nodes,
                resources = self.config['resources'],
                mode = self.config['role_distribution'])

        return deployed, undeployed
Esempio n. 7
0
    def download_file_sdk(self, service, drive_file, pathFile):
        """Download a file's content.

        Args:
            service: Drive API service instance.
            drive_file: Drive File instance.

        Returns:
            File if successful, None otherwise.
        """
        if drive_file:
            download_url = drive_file.get('downloadUrl')
        else:
            download_url = self.retrieve_file_metadata(service,
                                                       pathFile.split('/')[-1])
        if download_url:
            try:
                resp, content = service._http.request(download_url)
                if resp.status == 200:
                    out = open(pathFile, 'wb')
                    out.write(content)
                    out.close()
                    return out
            except errors.HttpError as e:
                error = simplejson.loads(e.content)
                logger.error('Error in Download ' + error.get('code') +
                             error.get('message'))
        else:
            # The file doesn't have any content stored on Drive.
            return None
Esempio n. 8
0
    def upload_file_sdk(self, service, filePath, fileName, fileType):
        """Upload a file's content.

        Args:
            service: Drive API service instance.
            filePath : Path to the file you want to upload.
            fileName : Name of the new file in the drive.
            fileType : Type of the file (text, ...)
            fileDescription : A short text to describe the file.

        Returns:
            File uploaded.
        """

        media_body = apiclient.http.MediaFileUpload(filePath,
                                                    mimetype=fileType,
                                                    resumable=True)
        body = {
            'title': fileName,
            'description': 'Temporary file',
        }
        new_file = None
        try:
            new_file = service.files().insert(body=body,
                                              media_body=media_body).execute()
        except errors.HttpError as e:
            error = simplejson.loads(e.content)
            logger.error('Error in Upload ' + error.get('code') +
                         error.get('message'))
        return new_file
Esempio n. 9
0
    def __define_ds_parameters(self, config):
        ds_parameters_names = config.options("ds_parameters")
        self.ds_parameters = {}
        ds_class_parameters = {}
        ds_classes = []
        for pn in ds_parameters_names:
            pv = config.get("ds_parameters", pn).split(",")
            if pn.startswith("ds.class."):
                ds_class_parameters[pn[len("ds.class."):]] = \
                    [v.strip() for v in pv]
            elif pn == "ds.class":
                ds_classes = [v.strip() for v in pv]
            else:
                self.ds_parameters[pn] = [v.strip() for v in pv]

        # Create ds configurations
        self.ds_config = []
        for (idx, ds_class) in enumerate(ds_classes):
            this_ds_params = {}
            for pn, pv in ds_class_parameters.iteritems():
                if len(pv) == len(ds_classes):
                    if pv[idx]:
                        this_ds_params[pn] = pv[idx]
                elif len(pv) == 1:
                    this_ds_params[pn] = pv[0]
                else:
                    logger.error("Number of ds_class does not much number of " +
                                 pn)
                    raise ParameterException("Number of ds_class does not much "
                                             "number of " + pn)

            self.ds_config.append((ds_class, this_ds_params))

        self.ds_parameters["ds.config"] = range(0, len(self.ds_config))
Esempio n. 10
0
    def __init__(self, jar_path, params=None, lib_paths=None):
        """Creates a new Hadoop MapReduce jar job with the given parameters.

        Args:
          jar_path (str):
            The local path of the jar containing the job.
          params (list of str, optional):
            The list of parameters of the job.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if not params:
            params = []
        if not lib_paths:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(jar_path):
            logger.error("Jar file " + jar_path + " does not exist")
            raise HadoopJobException("Jar file " + jar_path +
                                     " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.jar_path = jar_path
        self.params = params
        self.lib_paths = lib_paths
Esempio n. 11
0
    def bootstrap(self, tar_file):

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | '
                                   'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir +
                               " " + self.conf_dir,
                               self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tgz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir, self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 3. Specify environment variables
        command = "cat >> " + self.conf_dir + "/spark-env.sh << EOF\n"
        command += "JAVA_HOME=" + self.java_home + "\n"
        command += "SPARK_LOG_DIR=" + self.logs_dir + "\n"
        if self.hc:
            command += "HADOOP_CONF_DIR=" + self.hc.conf_dir + "\n"
        if self.mode == YARN_MODE:
            command += "YARN_CONF_DIR=" + self.hc.conf_dir + "\n"
        command += "EOF\n"
        command += "chmod +x " + self.conf_dir + "/spark-env.sh"
        action = Remote(command, self.hosts)
        action.run()
Esempio n. 12
0
    def __init__(self, params):
        """Create a static dataset with the given params.
        
        Args:
          params (dict):
            A dictionary with the parameters. This dataset needs the following
            parameters:
            - local_path: The path to the directory where the dataset is stored
                          locally.
            - pre_load_function: A function to be applied after transfers and
                                 before loading to dfs (usually decompression).
        """

        super(StaticDataset, self).__init__(params)

        local_path = params["local_path"]
        if not os.path.exists(local_path):
            logger.error("The dataset local dir does not exist")

        if "pre_load_function" in params:
            pre_load_function_name = params["pre_load_function"]
            self.pre_load_function = import_function(pre_load_function_name)
        else:
            self.pre_load_function = None

        self.local_path = local_path
Esempio n. 13
0
 def make_reservation(self):
     """Perform a reservation of the required number of nodes"""
     logger.info('Performing reservation')
     starttime = int(time.time() + timedelta_to_seconds(datetime.timedelta(minutes=1)))
     endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3,
                                                              minutes=1)))
     startdate, n_nodes = self._get_nodes(starttime, endtime)
     while not n_nodes:
         logger.info('No enough nodes found between %s and %s, ' + \
                     'increasing time window',
                     format_date(starttime), format_date(endtime))
         starttime = endtime
         endtime = int(starttime + timedelta_to_seconds(datetime.timedelta(days=3,
                                                             minutes=1)))
         startdate, n_nodes = self._get_nodes(starttime, endtime)
         if starttime > int(time.time() + timedelta_to_seconds(
                                         datetime.timedelta(weeks=6))):
             logger.error('There are not enough nodes on %s for your ' + \
                          'experiments, abort ...', self.cluster)
             exit()
     jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                 name=self.__class__.__name__)
     sub = jobs_specs[0][0]
     sub.walltime = self.options.walltime
     sub.additional_options = '-t deploy'
     sub.reservation_date = startdate
     (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
     logger.info('Startdate: %s, n_nodes: %s', format_date(startdate),
                 str(n_nodes))
Esempio n. 14
0
 def make_reservation(self):
     """Perform a reservation of the required number of nodes"""
     logger.info('Performing reservation')
     starttime = int(time.time() +
                     timedelta_to_seconds(datetime.timedelta(minutes=1)))
     endtime = int(
         starttime +
         timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
     startdate, n_nodes = self._get_nodes(starttime, endtime)
     while not n_nodes:
         logger.info('No enough nodes found between %s and %s, ' + \
                     'increasing time window',
                     format_date(starttime), format_date(endtime))
         starttime = endtime
         endtime = int(
             starttime +
             timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
         startdate, n_nodes = self._get_nodes(starttime, endtime)
         if starttime > int(time.time() +
                            timedelta_to_seconds(datetime.timedelta(
                                weeks=6))):
             logger.error('There are not enough nodes on %s for your ' + \
                          'experiments, abort ...', self.cluster)
             exit()
     jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                 name=self.__class__.__name__)
     sub = jobs_specs[0][0]
     sub.walltime = self.options.walltime
     sub.additional_options = '-t deploy'
     sub.reservation_date = startdate
     (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
     logger.info('Startdate: %s, n_nodes: %s', format_date(startdate),
                 str(n_nodes))
Esempio n. 15
0
    def __init__(self, jar_path, params=None, lib_paths=None):
        """Creates a new Hadoop MapReduce jar job with the given parameters.

        Args:
          jar_path (str):
            The local path of the jar containing the job.
          params (list of str, optional):
            The list of parameters of the job.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if not params:
            params = []
        if not lib_paths:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(jar_path):
            logger.error("Jar file " + jar_path + " does not exist")
            raise HadoopJobException("Jar file " + jar_path + " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.jar_path = jar_path
        self.params = params
        self.lib_paths = lib_paths
Esempio n. 16
0
    def retrieve_file_metadata(self, service, fname):
        """Retrieve a list of File resources.

        Args:
        service: Drive API service instance.
        Returns:
        List of File resources.
        """
        result = []
        page_token = None
        while True:
            try:
                param = {'maxResults': 1, 'q': "title = '" + fname + "'"}
                if page_token:
                    param['pageToken'] = page_token

                files = service.files().list(**param).execute()
                result.extend(files['items'])
                page_token = files.get('nextPageToken')
                if not page_token:
                    break
            except errors.HttpError, error:
                logger.error('An error occurred: ' + error)
                break
            print result
            return result[0]['downloadUrl']
Esempio n. 17
0
    def __init__(self, params):
        """Create a static dataset with the given params.
        
        Args:
          params (dict):
            A dictionary with the parameters. This dataset needs the following
            parameters:
            - local_path: The path to the directory where the dataset is stored
                          locally.
            - pre_load_function: A function to be applied after transfers and
                                 before loading to dfs (usually decompression).
        """

        super(StaticDataset, self).__init__(params)

        local_path = params["local_path"]
        if not os.path.exists(local_path):
            logger.error("The dataset local dir does not exist")

        if "pre_load_function" in params:
            pre_load_function_name = params["pre_load_function"]
            self.pre_load_function = import_function(pre_load_function_name)
        else:
            self.pre_load_function = None

        self.local_path = local_path
Esempio n. 18
0
 def prepare_global_vlan(self):
     vlans = g5k.get_oar_job_kavlan(*self.globalvlan_job)
     if len(vlans) > 0:
         self.global_vlan = vlans[0]
         logger.debug("Global VLAN ID: {}".format(self.global_vlan))
     else:
         logger.error("Could not reserve global VLAN")
         sys.exit(1)
Esempio n. 19
0
    def _check_initialization(self):
        """ Check whether the cluster is initialized and raise and exception if
        not.
        """

        if not self.initialized:
            logger.error("The cluster should be initialized")
            raise ClusterNotInitializedException("The cluster should be initialized")
Esempio n. 20
0
 def delete_file(self, client, fname):
     try:
         client.file_delete(fname)
     except ErrorResponse as e:
         logger.error('Error in Delete ' + str(e.status) + ' ' + e.reason +
                      ' : ' + e.error_msg)
         pass
     return True
Esempio n. 21
0
 def _check_version_compliance(self):
     if self.get_major_version() != 2:
         logger.error("Version of HadoopCluster is not compliant with the "
                     "distribution provided in the bootstrap option. Use "
                     "the appropiate parameter for --version when creating "
                     "the cluster or use another distribution.")
         return False
     else:
         return True
Esempio n. 22
0
 def _check_version_compliance(self):
     if self.get_major_version() >= 2:
         logger.error("Version of HadoopCluster is not compliant with the "
                      "distribution provided in the bootstrap option. Use "
                      "the appropriate parameter for --version when "
                      "creating the cluster or use another distribution.")
         return False
     else:
         return True
Esempio n. 23
0
def replace_in_xml_file(f, name, value, create_if_absent=False):
    """Assign the given value to variable name in xml file f.

    Args:
      f (str):
        The path of the file.
      name (str):
        The name of the variable.
      value (str):
        The new value to be assigned:
      create_if_absent (bool, optional):
        If True, the variable will be created at the end of the file in case
        it was not already present.

    Returns (bool):
      True if the assignment has been made, False otherwise.
    """

    changed = False

    (_, temp_file) = tempfile.mkstemp("", "xmlf-", "/tmp")

    inf = open(f)
    outf = open(temp_file, "w")
    line = inf.readline()
    while line != "":
        if "<name>" + name + "</name>" in line:
            if "<value>" in line:
                outf.write(__replace_line(line, value))
                changed = True
            else:
                outf.write(line)
                line = inf.readline()
                if line != "":
                    outf.write(__replace_line(line, value))
                    changed = True
                else:
                    logger.error("Configuration file " + f +
                                 " is not correctly formatted")
        else:
            if ("</configuration>" in line and
                    create_if_absent and not changed):
                outf.write("  <property><name>" + name + "</name>" +
                           "<value>" + str(value) + "</value></property>\n")
                outf.write(line)
                changed = True
            else:
                outf.write(line)
        line = inf.readline()
    inf.close()
    outf.close()

    if changed:
        shutil.copyfile(temp_file, f)
    os.remove(temp_file)

    return changed
Esempio n. 24
0
def replace_in_xml_file(f, name, value, create_if_absent=False):
    """Assign the given value to variable name in xml file f.

    Args:
      f (str):
        The path of the file.
      name (str):
        The name of the variable.
      value (str):
        The new value to be assigned:
      create_if_absent (bool, optional):
        If True, the variable will be created at the end of the file in case
        it was not already present.

    Returns (bool):
      True if the assignment has been made, False otherwise.
    """

    changed = False

    (_, temp_file) = tempfile.mkstemp("", "xmlf-", "/tmp")

    inf = open(f)
    outf = open(temp_file, "w")
    line = inf.readline()
    while line != "":
        if "<name>" + name + "</name>" in line:
            if "<value>" in line:
                outf.write(__replace_line(line, value))
                changed = True
            else:
                outf.write(line)
                line = inf.readline()
                if line != "":
                    outf.write(__replace_line(line, value))
                    changed = True
                else:
                    logger.error("Configuration file " + f +
                                 " is not correctly formatted")
        else:
            if ("</configuration>" in line and create_if_absent
                    and not changed):
                outf.write("  <property><name>" + name + "</name>" +
                           "<value>" + str(value) + "</value></property>\n")
                outf.write(line)
                changed = True
            else:
                outf.write(line)
        line = inf.readline()
    inf.close()
    outf.close()

    if changed:
        shutil.copyfile(temp_file, f)
    os.remove(temp_file)

    return changed
Esempio n. 25
0
 def _check_version_compliance(self):
     version = self.get_version()
     if not version.startswith("Hadoop 2."):
         logger.error(
             "Version of HadoopCluster is not compliant with the "
             "distribution provided in the bootstrap option. Use "
             "the appropiate parameter for --version when creating "
             "the cluster or use another distribution.")
         return False
     else:
         return True
Esempio n. 26
0
    def workflow(self, comb):
        """
            Compute one application launch 
            using a given parameter group
        """
        comb_ok = False
        try:
            # Generate configuration file needed by MPI processes
            logger.info("Generating assembly file...")
            py = comb['cores'] / comb['px']
            prepare = Process('cd %s && python %s %d %d %d %d %d %s app.lad' % 
                (self.workingPath, self.genLadScript, comb['datasize'], comb['datasize'], 
                    comb['datasize'], comb['px'], py, comb['transposition']))
            prepare.shell = True
            prepare.run()

            # Generate the MPI host file
            mfile = self.generate_machine_file()

            # Start L2C
            lad = "./app.lad"
            logger.info("Computing...")
            res = Process("export OAR_JOB_KEY_FILE=~/.oar_key ; cd %s && l2c_loader -M,-machinefile,%s --mpi -c %d %s" % (self.workingPath, mfile, comb['cores'], lad))
            res.shell = True
            res.stdout_handlers.append(os.path.join(self.result_dir, slugify(comb) + '.out'))
            res.stdout_handlers.append(sys.stdout)
            res.stderr_handlers.append(os.path.join(self.result_dir, slugify(comb) + '.err'))
            res.stderr_handlers.append(sys.stderr)
            res.run()
            if not res.ok:
                logger.error('Bad L2C termination')
                raise Exception('Bad L2C termination')
            if len(res.stderr) > 0: # WARNING: when L2C cannot find the LAD file or something strange like this
                logger.warning('Not empty error output')

            # Clean configuration files
            logger.info("Removing assembly files...")
            res = Process('cd %s && rm -f app.lad*' % self.workingPath)
            res.shell = True
            res.run()
                
            comb_ok = True
        except Exception:
            pass
        finally:
            if comb_ok:
                self.sweeper.done(comb)
                logger.info(style.host(slugify(comb)) + ' has been done')
            else:
                self.sweeper.cancel(comb)
                logger.warning(style.host(slugify(comb)) + ' has been canceled')
        
            logger.info(style.step('%s Remaining'),
                        len(self.sweeper.get_remaining()))
Esempio n. 27
0
 def _check_version_compliance(self):
     version = self.get_version()
     if not (version.startswith("Hadoop 0.") or
             version.startswith("Hadoop 1.")):
         logger.error("Version of HadoopCluster is not compliant with the "
                     "distribution provided in the bootstrap option. Use "
                     "the appropiate parameter for --version when creating "
                     "the cluster or use another distribution.")
         return False
     else:
         return True
Esempio n. 28
0
 def finish_deploy_server(self, deploy_process):
     deployed = deploy_process.deployed_hosts
     if len(deployed) == 0:
         logger.error("Could not deploy server")
         sys.exit(1)
     if self.multi_site():
         logger.debug("Deployed, transforming {} into {}".format(
             self.server.address,
             g5k.get_kavlan_host_name(self.server.address,
                                      self.global_vlan)))
         self.server.address = g5k.get_kavlan_host_name(
             self.server.address, self.global_vlan)
Esempio n. 29
0
    def _check_initialization(self):
        """ Check whether the cluster is initialized and raise and exception if
        not.
        
        Raises:
          HadoopNotInitializedException:
            If self.initialized = False
        """

        if not self.initialized:
            logger.error("The cluster should be initialized")
            raise HadoopNotInitializedException(
                "The cluster should be initialized")
Esempio n. 30
0
    def delete_file_sdk(self, service, file_id):
        """Permanently delete a file, skipping the trash.

        Args:
            service: Drive API service instance.
            file_id: ID of the file to delete.
        """
        try:
            service.files().delete(fileId=file_id).execute()
        except errors.HttpError as e:
            error = simplejson.loads(e.content)
            logger.error('Error in delete ' + error.get('code') +
                         error.get('message'))
Esempio n. 31
0
    def _check_initialization(self):
        """ Check whether the cluster is initialized and raise and exception if
        not.
        
        Raises:
          HadoopNotInitializedException:
            If self.initialized = False
        """

        if not self.initialized:
            logger.error("The cluster should be initialized")
            raise HadoopNotInitializedException(
                "The cluster should be initialized")
Esempio n. 32
0
 def finish_deploy_vmhosts(self, deploy_process):
     deployed = deploy_process.deployed_hosts
     if len(deployed) != len(self.vm_hosts):
         logger.error(
             "Could not deploy all VM hosts, only {}/{} deployed".format(
                 len(deployed), len(self.vm_hosts)))
         sys.exit(1)
     if self.multi_site():
         logger.debug(
             "Deployed, transforming VM hosts name to be able to reach them in the new VLAN"
         )
         for host in self.vm_hosts:
             host.address = g5k.get_kavlan_host_name(
                 host.address, self.global_vlan)
Esempio n. 33
0
    def bootstrap(self, tar_file):
        """Install Cassandra in all cluster nodes from the specified tar.gz file.

        Args:
          tar_file (str):
            The file containing Cassandra binaries.
        """

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages, self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; "
                + "apt-get update && apt-get install -y --force-yes "
                + required_packages,
                self.hosts,
            ).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess("echo $(readlink -f /usr/bin/javac | " 'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir + " " + self.conf_dir + " " + self.logs_dir, self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote("tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp", self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" + os.path.basename(tar_file).replace(".tar.gz", "") + " " + self.base_dir, self.hosts
        )
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir + " && mkdir -p " + self.logs_dir, self.hosts)
        chmods = TaktukRemote(
            "chmod g+w " + self.base_dir + " && chmod g+w " + self.conf_dir + " && chmod g+w " + self.logs_dir,
            self.hosts,
        )
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()
Esempio n. 34
0
 def upload_file_sdk(self, client, filePath, fileName):
     """Upload a file's content.
         Args:
             service: Drive API service instance.
             filePath : Path to the file you want to upload.
             fileName : Name of the new file in the drive.
         """
     f = open(filePath, 'rb')
     try:
         client.put_file(fileName, f)
     except ErrorResponse as e:
         logger.error('Error in Upload ' + str(e.status) + ' ' + e.reason +
                      ' : ' + e.error_msg)
         pass
     return True
Esempio n. 35
0
    def workflow(self, comb, host, comb_dir):
        """ """
        comb_ok = False
        thread_name = style.Thread(host.split('.')[0]) + ': '
        logger.info(thread_name + 'Starting combination ' + slugify(comb))

        try:
            logger.info(thread_name + 'Generate conf file')
            param_str = self.create_string(comb)

            Remote(
                "python /home/Work/sgcbntier/paasage_demo/xml_gen_execo.py --cb "
                + param_str, [host]).run()

            logger.info(thread_name + 'Run code')
            Remote(
                "cd /home/Work/sgcbntier/paasage_demo/ ; python run_all_execo.py --cb %s"
                % param_str, [host]).run()

            logger.info(thread_name + 'Get results')

            traceFile = "ntier_" + param_str
            get_results = Get([host], [
                "/home/Work/sgcbntier/paasage_demo/csv/REQTASK_" + traceFile +
                ".csv"
            ],
                              local_location=comb_dir).run()

            for p in get_results.processes:
                if not p.ok:
                    logger.error(
                        host +
                        ': Unable to retrieve the files for combination %s',
                        slugify(comb))
                    exit()

            comb_ok = True
        finally:
            if comb_ok:
                self.sweeper.done(comb)
                logger.info(thread_name + ': ' + slugify(comb) + \
                             ' has been done')
            else:
                self.sweeper.cancel(comb)
                logger.warning(thread_name + ': ' + slugify(comb) + \
                            ' has been canceled')
        logger.info(style.step('%s Remaining'),
                    len(self.sweeper.get_remaining()))
Esempio n. 36
0
    def make_reservation(self):
        """Perform a reservation of the required number of nodes."""

        logger.info('Performing reservation')
        now = int(time.time() +
                  timedelta_to_seconds(datetime.timedelta(minutes=1)))
        starttime = now
        endtime = int(
            starttime +
            timedelta_to_seconds(datetime.timedelta(days=3, minutes=1)))
        startdate, n_nodes = self._get_nodes(starttime, endtime)

        search_time = 3 * 24 * 60 * 60  # 3 days
        walltime_seconds = get_seconds(self.options.walltime)

        iteration = 0
        while not n_nodes:
            iteration += 1
            logger.info(
                'Not enough nodes found between %s and %s, ' +
                'increasing time window', format_date(starttime),
                format_date(endtime))
            starttime = max(now,
                            now + iteration * search_time - walltime_seconds)
            endtime = int(now + (iteration + 1) * search_time)

            startdate, n_nodes = self._get_nodes(starttime, endtime)
            if starttime > int(time.time() +
                               timedelta_to_seconds(datetime.timedelta(
                                   weeks=6))):
                logger.error(
                    'There are not enough nodes on %s for your ' +
                    'experiments, abort ...', self.cluster)
                exit()

        jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                    name=self.__class__.__name__)
        sub = jobs_specs[0][0]
        sub.walltime = self.options.walltime
        if self.use_kadeploy:
            sub.additional_options = '-t deploy'
        else:
            sub.additional_options = '-t allow_classic_ssh'
        sub.reservation_date = startdate
        (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
        logger.info('Startdate: %s, n_nodes: %s, job_id: %s',
                    format_date(startdate), str(n_nodes), str(self.oar_job_id))
Esempio n. 37
0
 def download_file_sdk(self, client, fileName, filePath):
     """Download a file's content.
     Args:
         client: Dropbox client instance.
         fileName: Name of the file you want to download.
         filePath: Name of the new local file.
     """
     try:
         f, _ = client.get_file_and_metadata(fileName)
         out = open(filePath, 'wb')
         out.write(f.read())
         out.close()
     except ErrorResponse as e:
         logger.error('Error in Download ' + str(e.status) + ' ' +
                      e.reason + ' : ' + e.error_msg)
         pass
     return True
Esempio n. 38
0
    def make_reservation(self):
        """Perform a reservation of the required number of nodes."""

        logger.info('Performing reservation')
        now = int(time.time() +
                  timedelta_to_seconds(datetime.timedelta(minutes=1)))
        starttime = now
        endtime = int(starttime +
                      timedelta_to_seconds(datetime.timedelta(days=3,
                                                              minutes=1)))
        startdate, n_nodes = self._get_nodes(starttime, endtime)

        search_time = 3 * 24 * 60 * 60  # 3 days
        walltime_seconds = get_seconds(self.options.walltime)

        iteration = 0
        while not n_nodes:
            iteration += 1
            logger.info('Not enough nodes found between %s and %s, ' +
                        'increasing time window',
                        format_date(starttime), format_date(endtime))
            starttime = max(now, now +
                            iteration * search_time - walltime_seconds)
            endtime = int(now + (iteration + 1) * search_time)

            startdate, n_nodes = self._get_nodes(starttime, endtime)
            if starttime > int(time.time() + timedelta_to_seconds(
                    datetime.timedelta(weeks=6))):
                logger.error('There are not enough nodes on %s for your ' +
                             'experiments, abort ...', self.cluster)
                exit()

        jobs_specs = get_jobs_specs({self.cluster: n_nodes},
                                    name=self.__class__.__name__)
        sub = jobs_specs[0][0]
        sub.walltime = self.options.walltime
        if self.use_kadeploy:
            sub.additional_options = '-t deploy'
        else:
            sub.additional_options = '-t allow_classic_ssh'
        sub.reservation_date = startdate
        (self.oar_job_id, self.frontend) = oarsub(jobs_specs)[0]
        logger.info('Startdate: %s, n_nodes: %s, job_id: %s',
                    format_date(startdate),
                    str(n_nodes), str(self.oar_job_id))
Esempio n. 39
0
    def __define_test_parameters(self, config):
        if config.has_section("test_parameters"):
            test_parameters_names = config.options("test_parameters")
            if "test.stats_path" in test_parameters_names:
                self.stats_manager.stats_path = \
                    config.get("test_parameters", "test.stats_path")
                if not os.path.exists(self.stats_manager.stats_path):
                    os.makedirs(self.stats_manager.stats_path)

            if "test.summary_file" in test_parameters_names:
                self.stats_manager.summary_file_name = \
                    config.get("test_parameters", "test.summary_file")

            if "test.ds_summary_file" in test_parameters_names:
                self.stats_manager.ds_summary_file_name = \
                    config.get("test_parameters", "test.ds_summary_file")

            if "test.num_repetitions" in test_parameters_names:
                self.comb_manager.num_repetitions = \
                    int(config.get("test_parameters", "test.num_repetitions"))

            if "test.jar_file" in test_parameters_names:
                self.jar_file = config.get("test_parameters", "test.jar_file")

            if "test.remote_dir" in test_parameters_names:
                self.remote_dir = config.get("test_parameters",
                                             "test.remote_dir")

            if "test.use_kadeploy" in test_parameters_names:
                self.use_kadeploy = config.getboolean("test_parameters",
                                                      "test.use_kadeploy")

            if self.use_kadeploy:
                if "test.kadeploy.env_file" in test_parameters_names:
                    self.kadeploy_env_file = \
                        config.get("test_parameters", "test.kadeploy.env_file")
                elif "test.kadeploy.env_name" in test_parameters_names:
                    self.kadeploy_env_name = \
                        config.get("test_parameters", "test.kadeploy.env_name")
                else:
                    logger.error("Either test.kadeploy.env_file or "
                                 "test.kadeploy.env_name should be specified")
                    raise ParameterException("Either test.kadeploy.env_file or "
                                             "test.kadeploy.env_name should be "
                                             "specified")
Esempio n. 40
0
 def prepare_bench(self):
     """bench configuration and compilation, copy binaries to frontends
     
     return True if preparation is ok
     """
     logger.info("preparation: configure and compile benchmark")
     # the involved sites. We will do the compilation on the first of these.
     sites = list(set(map(get_cluster_site, self.parameters['cluster'])))
     # generate the bench compilation configuration
     bench_list = '\n'.join([ 'lu\t%s\t%s' % (size, n_core)
                              for n_core in self.parameters['n_core']
                              for size in self.parameters['size'] ])
     # Reserving a node because compiling on the frontend is forbidden
     # and because we need mpif77
     jobs = oarsub([(OarSubmission(resources = "nodes=1",
                                   job_type = 'allow_classic_ssh',
                                   walltime ='0:10:00'), sites[0])])
     if jobs[0][0]:
         try:
             logger.info("copying bench archive to %s" % (sites[0],))
             copy_bench = Put([sites[0]], ['NPB3.3-MPI.tar.bz2']).run()
             logger.info("extracting bench archive on %s" % (sites[0],))
             extract_bench = Remote('tar -xjf NPB3.3-MPI.tar.bz2', [sites[0]]).run()
             logger.info("waiting job start %s" % (jobs[0],))
             wait_oar_job_start(*jobs[0], prediction_callback = pred_cb)
             logger.info("getting nodes of %s" % (jobs[0],))
             nodes = get_oar_job_nodes(*jobs[0])
             logger.info("configure bench compilation")
             conf_bench = Remote('echo "%s" > ~/NPB3.3-MPI/config/suite.def' % bench_list, nodes).run()
             logger.info("compil bench")
             compilation = Remote('cd NPB3.3-MPI && make clean && make suite', nodes).run()
             logger.info("compil finished")
         except:
             logger.error("unable to compile bench")
             return False
         finally:
             oardel(jobs)
     # Copying binaries to all other frontends
     frontends = sites[1:]
     rsync = Remote('rsync -avuP ~/NPB3.3-MPI/ {{frontends}}:NPB3.3-MPI', 
                    [get_host_site(nodes[0])] * len(frontends)) 
     rsync.run()
     return compilation.ok and rsync.ok
Esempio n. 41
0
    def _make_reservation(self, site):
        """Make a new reservation"""

        elements = {self.config['cluster']: 1}
        logger.info('Finding slot for the experiment '
                    '\nrally %s:1',
                    style.host(self.config['cluster']).rjust(5))

        planning = funk.get_planning(elements)
        slots = funk.compute_slots(planning,
                                   walltime=self.config['walltime'].encode(
                                       'ascii', 'ignore'),
                                   excluded_elements=EXCLUDED_ELEMENTS)

        startdate, enddate, resources = funk.find_free_slot(slots, elements)
        resources = funk.distribute_hosts(resources, elements,
                                          EXCLUDED_ELEMENTS)

        if startdate is None:
            logger.error("Sorry, could not find the resources requested.")
            exit(4)

        jobs_specs = funk.get_jobs_specs(resources,
                                         name=self.options.job_name,
                                         excluded_elements=EXCLUDED_ELEMENTS)

        print jobs_specs

        sub, site = jobs_specs[0]
        sub.additional_options = "-t deploy"
        sub.reservation_date = startdate
        sub.walltime = self.config['walltime'].encode('ascii', 'ignore')
        sub.name = self.options.job_name

        if 'testing' in EX5.get_cluster_attributes(
                self.config['cluster'])['queues']:
            sub.queue = 'testing'

        jobs = EX5.oarsub([(sub, site)])
        self.job_id = jobs[0][0]
        logger.info('Job %s will start at %s', style.emph(self.job_id),
                    style.log_header(EX.time_utils.format_date(startdate)))
Esempio n. 42
0
    def start_shell(self, language="IPYTHON", node=None, exec_params=None):
        """Open a Spark shell.

        Args:
          language (str, optional):
            The language to be used in the shell.
          node (Host, optional):
            The host were the shell is to be started. If not provided,
            self.master is chosen.
          exec_params (str, optional):
            The list of parameters used in job execution (e.g., driver-memory).
        """

        if not node:
            node = self.master

        # Configure execution options
        if exec_params is None:
            exec_params = []

        if self.mode == YARN_MODE:
            exec_params.append("--master yarn-client")

        params_str = " " + " ".join(exec_params)

        # Execute shell
        if language.upper() == "IPYTHON":
            call("ssh -t " + node.address + " " + "IPYTHON=1 " + self.bin_dir +
                 "/pyspark" + params_str,
                 shell=True)
        elif language.upper() == "PYTHON":
            call("ssh -t " + node.address + " " + self.bin_dir + "/pyspark" +
                 params_str,
                 shell=True)
        elif language.upper() == "SCALA":
            call("ssh -t " + node.address + " " + self.bin_dir +
                 "/spark-shell" + params_str,
                 shell=True)
        else:
            logger.error("Unknown language " + language)
            return
Esempio n. 43
0
    def start_shell(self, language="IPYTHON", node=None, exec_params=None):
        """Open a Spark shell.

        Args:
          language (str, optional):
            The language to be used in the shell.
          node (Host, optional):
            The host were the shell is to be started. If not provided,
            self.master is chosen.
          exec_params (str, optional):
            The list of parameters used in job execution (e.g., driver-memory).
        """

        if not node:
            node = self.master

        # Configure execution options
        if exec_params is None:
            exec_params = []

        if self.mode == YARN_MODE:
            exec_params.append("--master yarn-client")

        params_str = " " + " ".join(exec_params)

        # Execute shell
        if language.upper() == "IPYTHON":
            call("ssh -t " + node.address + " " +
                 "IPYTHON=1 " + self.bin_dir + "/pyspark" + params_str,
                 shell=True)
        elif language.upper() == "PYTHON":
            call("ssh -t " + node.address + " " +
                 self.bin_dir + "/pyspark" + params_str,
                 shell=True)
        elif language.upper() == "SCALA":
            call("ssh -t " + node.address + " " +
                 self.bin_dir + "/spark-shell" + params_str,
                 shell=True)
        else:
            logger.error("Unknown language " + language)
            return
Esempio n. 44
0
    def __init__(self,
                 job_path,
                 exec_params=None,
                 app_params=None,
                 lib_paths=None):
        """Create a new Spark job with the given parameters.

        Args:
          job_path (str):
            The local path of the file containing the job binaries.
          exec_params (list of str, optional):
            The list of parameters used in job execution (e.g., driver-memory).
          app_params (list of str, optional):
            The list of parameters of the application.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if exec_params is None:
            exec_params = []
        if app_params is None:
            app_params = []
        if lib_paths is None:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(job_path):
            logger.error("Job binaries file " + job_path + " does not exist")
            raise SparkJobException("Job binaries file " + job_path +
                                    " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.job_path = job_path
        self.exec_params = exec_params
        self.app_params = app_params
        self.lib_paths = lib_paths
Esempio n. 45
0
    def add_dependency(self, m1, m2):
        """Include a new macro dependency: m1 -> m2. This means that to obtain
        the value of m2 we use the value of m1.
        
        Args:
          m1 (string):
            The name of the param used.
          m2 (string):
            The name of the param being specified.

        Raises:
          MacroException:
            If the order of sections (test -> ds -> xp) is not respected.
        """

        # Check if dependency is correct
        if m1 in self.ds_params:
            if m2 in self.test_macros:
                logger.error("Not allowed dependency: ds -> test")
                raise MacroException("Not allowed dependency: ds -> test")
        elif m1 in self.xp_params:
            if m2 in self.test_macros:
                logger.error("Not allowed dependency: xp -> test")
                raise MacroException("Not allowed dependency: xp -> test")
            elif m2 in self.ds_params:
                logger.error("Not allowed dependency: xp -> ds")
                raise MacroException("Not allowed dependency: xp -> ds")

        # Add dependency
        self.dep_graph.add_edge(m1, m2)
Esempio n. 46
0
    def add_dependency(self, m1, m2):
        """Include a new macro dependency: m1 -> m2. This means that to obtain
        the value of m2 we use the value of m1.
        
        Args:
          m1 (string):
            The name of the param used.
          m2 (string):
            The name of the param being specified.

        Raises:
          MacroException:
            If the order of sections (test -> ds -> xp) is not respected.
        """

        # Check if dependency is correct
        if m1 in self.ds_params:
            if m2 in self.test_macros:
                logger.error("Not allowed dependency: ds -> test")
                raise MacroException("Not allowed dependency: ds -> test")
        elif m1 in self.xp_params:
            if m2 in self.test_macros:
                logger.error("Not allowed dependency: xp -> test")
                raise MacroException("Not allowed dependency: xp -> test")
            elif m2 in self.ds_params:
                logger.error("Not allowed dependency: xp -> ds")
                raise MacroException("Not allowed dependency: xp -> ds")

        # Add dependency
        self.dep_graph.add_edge(m1, m2)
Esempio n. 47
0
    def _make_reservation(self):
        """Make a new reservation."""

        # Extract the list of criteria (ie, `oarsub -l
        # *criteria*`) in order to compute a specification for the
        # reservation.
        criteria = {}
        # Actual criteria are :
        # - Number of node per site
        for cluster, roles in self.config["resources"].items():
            site = get_cluster_site(cluster)
            nb_nodes = reduce(operator.add, map(int, roles.values()))
            criterion = "{cluster='%s'}/nodes=%s" % (cluster, nb_nodes)
            criteria.setdefault(site, []).append(criterion)

        for site, vlan in self.config["vlans"].items():
            criteria.setdefault(site, []).append(vlan)

        # Compute the specification for the reservation
        jobs_specs = [(OarSubmission(resources = '+'.join(c),
                                     name = self.config["name"]), s)
                      for s, c in criteria.items()]
        logger.info("Criteria for the reservation: %s" % pf(jobs_specs))

        # Make the reservation
        gridjob, _ = EX5.oargridsub(
            jobs_specs,
            reservation_date=self.config['reservation'],
            walltime=self.config['walltime'].encode('ascii', 'ignore'),
            job_type='deploy'
        )

        # TODO - move this upper to not have a side effect here
        if gridjob is not None:
            self.gridjob = gridjob
            logger.info("Using new oargrid job %s" % style.emph(self.gridjob))
        else:
            logger.error("No oar job was created.")
            sys.exit(26)
Esempio n. 48
0
    def deploy_nodes(self, min_deployed_hosts=1, max_tries=3):
        """Deploy nodes in the cluster. If the number of deployed nodes is less
        that the specified min, try again.
        
        Args:
          min_deployed_hosts (int, optional):
            Minimum number of nodes to be deployed.
          max_tries (int, optional):
            Maximum number of tries to reach the minimum number of nodes.
        """

        logger.info("Deploying " + str(len(self.hosts)) + " nodes")

        def correct_deployment(deployed, undeployed):
            return len(deployed) >= min_deployed_hosts

        if self.kadeploy_env_file:
            deployment = Deployment(self.hosts,
                                    env_file=self.kadeploy_env_file)
        elif self.kadeploy_env_name:
            deployment = Deployment(self.hosts,
                                    env_name=self.kadeploy_env_name)
        else:
            logger.error("Neither env_file nor env_name are specified")
            raise ParameterException(
                "Neither env_file nor env_name are specified")

        (deployed, undeployed) = deploy(deployment,
                                        num_tries=max_tries,
                                        check_enough_func=correct_deployment,
                                        out=True)

        logger.info("%i deployed, %i undeployed" %
                    (len(deployed), len(undeployed)))

        if not correct_deployment(deployed, undeployed):
            logger.error("It was not possible to deploy min number of hosts")

        return (deployed, undeployed)
Esempio n. 49
0
    def deploy_nodes(self, min_deployed_hosts=1, max_tries=3):
        """Deploy nodes in the cluster. If the number of deployed nodes is less
        that the specified min, try again.

        Args:
          min_deployed_hosts (int, optional): minimum number of nodes to be
            deployed (default: 1).
          max_tries (int, optional): maximum number of tries to reach the
            minimum number of nodes (default: 3).
        """

        logger.info("Deploying " + str(len(self.hosts)) + " nodes")

        def correct_deployment(deployed, undeployed):
            return len(deployed) >= min_deployed_hosts

        if self.kadeploy_env_file:
            deployment = Deployment(self.hosts, env_file=self.kadeploy_env_file)
        elif self.kadeploy_env_name:
            deployment = Deployment(self.hosts, env_name=self.kadeploy_env_name)
        else:
            logger.error("Neither env_file nor env_name are specified")
            raise ParameterException("Neither env_file nor env_name are "
                                     "specified")

        (deployed, undeployed) = deploy(
            deployment,
            num_tries=max_tries,
            check_enough_func=correct_deployment,
            out=True
        )

        logger.info("%i deployed, %i undeployed" % (len(deployed),
                                                    len(undeployed)))

        if not correct_deployment(deployed, undeployed):
            logger.error("It was not possible to deploy min number of hosts")

        return (deployed, undeployed)
Esempio n. 50
0
    def __init__(self, job_path, exec_params=None, app_params=None,
                 lib_paths=None):
        """Create a new Spark job with the given parameters.

        Args:
          job_path (str):
            The local path of the file containing the job binaries.
          exec_params (list of str, optional):
            The list of parameters used in job execution (e.g., driver-memory).
          app_params (list of str, optional):
            The list of parameters of the application.
          lib_paths (list of str, optional):
            The list of local paths to the libraries used by the job.
        """

        if exec_params is None:
            exec_params = []
        if app_params is None:
            app_params = []
        if lib_paths is None:
            lib_paths = []

        # Check if the jar file exists
        if not os.path.exists(job_path):
            logger.error("Job binaries file " + job_path + " does not exist")
            raise SparkJobException("Job binaries file " + job_path +
                                    " does not exist")

        # Check if the libraries exist
        for lp in lib_paths:
            if not os.path.exists(lp):
                logger.warn("Lib file " + lp + " does not exist")
                return  # TODO - exception

        self.job_path = job_path
        self.exec_params = exec_params
        self.app_params = app_params
        self.lib_paths = lib_paths
Esempio n. 51
0
    def run(self):
        """Execute a test suite. The execution workflow is as follows:

        1. Parse command-line arguments.

        2. Define the parameters of the tests from the specified configuration
        file. Generate all the combination to test from the given parameters.

        3. Consume the combinations.

          3.1. Setup the cluster if it has not been done (first time or after a
          reservation ends.

          3.2. Load the dataset into the Hadoop cluster.

          3.3. Perform the experiments corresponding to the combinations linked
          to the loaded dataset.

        4. Clean all resources.
        """

        # Get parameters
        self.cluster = self.args[0]
        self.n_nodes = int(self.args[1])
        self.config_file = self.args[2]
        self.site = get_cluster_site(self.cluster)

        if not os.path.exists(self.config_file):
            logger.error("Params file " + self.config_file + " does not exist")
            sys.exit(1)

        # Set oar job id
        if self.options.oar_job_id:
            self.oar_job_id = self.options.oar_job_id
        else:
            self.oar_job_id = None

        # Main
        try:
            # Creation of the main iterator used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While they are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:

                # SETUP
                # If no job, we make a reservation and prepare the hosts for the
                # experiments
                if job_is_dead or self.oar_job_id is None:
                    self.make_reservation()
                    success = self.setup()
                    if not success:
                        break
                else:
                    self.hosts = get_oar_job_nodes(self.oar_job_id,
                                                   self.frontend)
                if not self.hc:
                    self.hc = HadoopCluster(self.hosts)
                # SETUP FINISHED

                # Getting the next combination (which requires a ds deployment)
                comb = self.sweeper.get_next()
                self.raw_comb = comb.copy()
                self.comb = comb
                self.prepare_dataset(comb)
                self.xp_wrapper(comb)

                # subloop over the combinations that use the same dataset
                while True:
                    newcomb = self.sweeper.get_next(
                        lambda r: filter(self._uses_same_ds, r))
                    if newcomb:
                        self.raw_comb = newcomb.copy()
                        try:
                            self.xp_wrapper(newcomb)
                        except:
                            break
                    else:
                        break

                if get_oar_job_info(self.oar_job_id,
                                    self.frontend)['state'] == 'Error':
                    job_is_dead = True

        finally:
            if self.oar_job_id is not None:
                if not self.options.keep_alive:
                    pass
                    logger.info('Deleting job')
                    oardel([(self.oar_job_id, self.frontend)])
                else:
                    logger.info('Keeping job alive for debugging')

            # Clean cluster
            if self.hc:
                if self.hc.initialized:
                    self.hc.clean()

            # Close summary files
            if self.summary_file:
                self.summary_file.close()
            if self.ds_summary_file:
                self.ds_summary_file.close()
Esempio n. 52
0
def init_os():
    # Authenticate to keystone
    # http://docs.openstack.org/developer/keystoneauth/using-sessions.html
    # http://docs.openstack.org/developer/python-glanceclient/apiv2.html
    keystone_addr = STATE['config']['vip']
    auth = v3.Password(auth_url='http://%s:5000/v3' % keystone_addr,
                       username='******',
                       password='******',
                       project_name='admin',
                       user_domain_id='default',
                       project_domain_id='default')
    sess = session.Session(auth=auth)

    # Install `member` role
    keystone = kclient.Client(session=sess)
    role_name = 'member'
    if role_name not in map(attrgetter('name'), keystone.roles.list()):
        logger.info("Creating role %s" % role_name)
        keystone.roles.create(role_name)

    # Install cirros with glance client if absent
    glance = gclient.Client('2', session=sess)
    cirros_name = 'cirros.uec'
    if cirros_name not in map(itemgetter('name'), glance.images.list()):
        # Download cirros
        image_url  = 'http://download.cirros-cloud.net/0.3.4/'
        image_name = 'cirros-0.3.4-x86_64-disk.img'
        logger.info("Downloading %s at %s..." % (cirros_name, image_url))
        cirros_img = requests.get(image_url + '/' + image_name)

        # Install cirros
        cirros = glance.images.create(name=cirros_name,
                                      container_format='bare',
                                      disk_format='qcow2',
                                      visibility='public')
        glance.images.upload(cirros.id, cirros_img.content)
        logger.info("%s has been created on OpenStack" %  cirros_name)

    # Install default flavors
    nova = nclient.Client('2', session=sess)
    default_flavors = [
            # name, ram, disk, vcpus
            ('m1.tiny', 512, 1, 1),
            ('m1.small', 2048, 20, 1),
            ('m1.medium', 4096, 40, 2),
            ('m1.large', 8192, 80, 4),
            ('m1.xlarge', 16384, 160,8)
    ]
    current_flavors = map(attrgetter('name'), nova.flavors.list())
    for flavor in default_flavors:
        if flavor[0] not in current_flavors:
            nova.flavors.create(name=flavor[0],
                        ram=flavor[1],
                        disk=flavor[2],
                        vcpus=flavor[3])
            logger.info("%s has been created on OpenStack" % flavor[0])

    # Install default network
    neutron = ntnclient.Client('2', session=sess)
    network_name = 'public1'
    network_id = ''
    networks = neutron.list_networks()['networks']
    if network_name not in map(itemgetter('name'), networks):
        network = {'name': network_name,
                   'provider:network_type': 'flat',
                   'provider:physical_network': 'physnet1',
                   'router:external': True
        }
        res = neutron.create_network({'network': network})
        network_id  = res['network']['id']
        logger.info("%s network has been created on OpenStack" % network_name)

    if not network_id:
        logger.error("no network_id for %s network" % network_name)
        sys.exit(32)

    # Install default subnet
    subnet_name = '1-subnet'
    subnets = neutron.list_subnets()['subnets']
    if subnet_name not in map(itemgetter('name'), subnets):
        subnet = {'name': subnet_name,
                  'network_id': network_id,
                  'cidr': '10.0.2.0/24',
                  'ip_version': 4}
        neutron.create_subnet({'subnet': subnet})
        logger.info("%s has been created on OpenStack" % subnet_name)
Esempio n. 53
0
    def run(self):
        """Inherited method, put here the code for running the engine."""

        # Get parameters
        self.cluster = self.args[0]
        self.n_nodes = int(self.args[1])
        self.config_file = self.args[2]
        self.site = get_cluster_site(self.cluster)

        if not os.path.exists(self.config_file):
            logger.error("Params file " + self.config_file + " does not exist")
            sys.exit(1)

        # Set oar job id
        if self.options.oar_job_id:
            self.oar_job_id = self.options.oar_job_id
        else:
            self.oar_job_id = None

        # Main
        try:
            # Creation of the main iterator used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While they are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:

                ## SETUP
                # If no job, we make a reservation and prepare the hosts for the
                # experiments
                if job_is_dead or self.oar_job_id is None:
                    self.make_reservation()
                    success = self.setup()
                    if not success:
                        break
                else:
                    self.hosts = get_oar_job_nodes(self.oar_job_id,
                                                   self.frontend)
                ## SETUP FINISHED

                logger.info("Setup finished in hosts " + str(self.hosts))

                test_threads = []
                for h in self.hosts:
                    t = TestThread(h, self.comb_manager, self.stats_manager)
                    test_threads.append(t)
                    t.name = "th_" + str(h.address).split(".")[0]
                    t.start()

                for t in test_threads:
                    t.join()

                if get_oar_job_info(self.oar_job_id,
                                    self.frontend)['state'] == 'Error':
                    job_is_dead = True

        finally:
            if self.oar_job_id is not None:
                if not self.options.keep_alive:
                    pass
                    logger.info('Deleting job')
                    oardel([(self.oar_job_id, self.frontend)])
                else:
                    logger.info('Keeping job alive for debugging')

            # Close stats
            self.stats_manager.close()
Esempio n. 54
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check that required packages are present
        required_packages = "openjdk-7-jre openjdk-7-jdk"
        check_packages = TaktukRemote("dpkg -s " + required_packages,
                                      self.hosts)
        for p in check_packages.processes:
            p.nolog_exit_code = p.nolog_error = True
        check_packages.run()
        if not check_packages.ok:
            logger.info("Packages not installed, trying to install")
            install_packages = TaktukRemote(
                "export DEBIAN_MASTER=noninteractive ; " +
                "apt-get update && apt-get install -y --force-yes " +
                required_packages, self.hosts).run()
            if not install_packages.ok:
                logger.error("Unable to install the packages")

        get_java_home = SshProcess('echo $(readlink -f /usr/bin/javac | '
                                   'sed "s:/bin/javac::")', self.master)
        get_java_home.run()
        self.java_home = get_java_home.stdout.strip()

        logger.info("All required packages are present")

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = Remote("rm -rf " + self.base_dir +
                         " " + self.conf_dir +
                         " " + self.logs_dir +
                         " " + self.hadoop_temp_dir,
                         self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && mkdir -p " + self.hadoop_temp_dir,
                              self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir +
                              " && chmod g+w " + self.hadoop_temp_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version
        return self._check_version_compliance()
Esempio n. 55
0
    def bootstrap(self, tar_file):
        """Install Hadoop in all cluster nodes from the specified tar.gz file.
        
        Args:
          tar_file (str):
            The file containing Hadoop binaries.
        """

        # 0. Check requirements
        java_major_version = 7
        if not check_java_version(java_major_version, self.hosts):
            msg = "Java 1.%d+ required" % java_major_version
            logger.error(msg)
            raise HadoopException(msg)

        self.java_home = get_java_home(self.master)

        # 1. Copy hadoop tar file and uncompress
        logger.info("Copy " + tar_file + " to hosts and uncompress")
        rm_dirs = TaktukRemote("rm -rf " + self.base_dir +
                               " " + self.conf_dir +
                               " " + self.logs_dir +
                               " " + self.hadoop_temp_dir,
                               self.hosts)
        put_tar = TaktukPut(self.hosts, [tar_file], "/tmp")
        tar_xf = TaktukRemote(
            "tar xf /tmp/" + os.path.basename(tar_file) + " -C /tmp",
            self.hosts)
        rm_tar = TaktukRemote(
            "rm /tmp/" + os.path.basename(tar_file),
            self.hosts)
        SequentialActions([rm_dirs, put_tar, tar_xf, rm_tar]).run()

        # 2. Move installation to base dir and create other dirs
        logger.info("Create installation directories")
        mv_base_dir = TaktukRemote(
            "mv /tmp/" +
            os.path.basename(tar_file).replace(".tar.gz", "") + " " +
            self.base_dir,
            self.hosts)
        mkdirs = TaktukRemote("mkdir -p " + self.conf_dir +
                              " && mkdir -p " + self.logs_dir +
                              " && mkdir -p " + self.hadoop_temp_dir,
                              self.hosts)
        chmods = TaktukRemote("chmod g+w " + self.base_dir +
                              " && chmod g+w " + self.conf_dir +
                              " && chmod g+w " + self.logs_dir +
                              " && chmod g+w " + self.hadoop_temp_dir,
                              self.hosts)
        SequentialActions([mv_base_dir, mkdirs, chmods]).run()

        # 4. Specify environment variables
        command = "cat >> " + self.conf_dir + "/hadoop-env.sh << EOF\n"
        command += "export JAVA_HOME=" + self.java_home + "\n"
        command += "export HADOOP_LOG_DIR=" + self.logs_dir + "\n"
        command += "HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"\n"
        command += "EOF"
        action = Remote(command, self.hosts)
        action.run()

        # 5. Check version (cannot do it before)
        if not self._check_version_compliance():
            return False

        # 6. Generate initial configuration
        self._initialize_conf()

        return True
Esempio n. 56
0
    def __define_test_parameters(self, config):
        if config.has_section("test_parameters"):
            test_parameters_names = config.options("test_parameters")
            if "test.stats_path" in test_parameters_names:
                self.stats_path = config.get("test_parameters",
                                             "test.stats_path")
                if not os.path.exists(self.stats_path):
                    os.makedirs(self.stats_path)

            if "test.remove_output" in test_parameters_names:
                self.remove_output = \
                    bool(config.get("test_parameters", "test.remove_output"))

            if "test.output_path" in test_parameters_names:
                self.output_path = \
                    config.get("test_parameters", "test.output_path")
                if not os.path.exists(self.output_path):
                    os.makedirs(self.output_path)

            if "test.summary_file" in test_parameters_names:
                self.summary_file_name = \
                    config.get("test_parameters", "test.summary_file")

            if "test.ds_summary_file" in test_parameters_names:
                self.ds_summary_file_name = \
                    config.get("test_parameters", "test.ds_summary_file")

            if "test.num_repetitions" in test_parameters_names:
                self.num_repetitions = \
                    int(config.get("test_parameters", "test.num_repetitions"))

            if "test.hadoop.properties" in test_parameters_names:
                self.hadoop_props = \
                    config.get("test_parameters", "test.hadoop.properties")
                if not os.path.exists(self.hadoop_props):
                    logger.error("Hadoop properties file " + self.hadoop_props +
                                 " does not exist")
                    raise ParameterException("Hadoop properties file " +
                                             self.hadoop_props +
                                             " does not exist")

            if "test.use_kadeploy" in test_parameters_names:
                self.use_kadeploy = config.getboolean("test_parameters",
                                                      "test.use_kadeploy")

            if self.use_kadeploy:
                if "test.kadeploy.env_file" in test_parameters_names:
                    self.kadeploy_env_file = \
                        config.get("test_parameters", "test.kadeploy.env_file")
                elif "test.kadeploy.env_name" in test_parameters_names:
                    self.kadeploy_env_name = \
                        config.get("test_parameters", "test.kadeploy.env_name")
                else:
                    logger.error("Either test.kadeploy.env_file or "
                                 "test.kadeploy.env_name should be specified")
                    raise ParameterException("Either test.kadeploy.env_file or "
                                             "test.kadeploy.env_name should be "
                                             "specified")
            else:
                if "test.hadoop.tar_file" in test_parameters_names:
                    self.hadoop_tar_file = \
                        config.get("test_parameters", "test.hadoop.tar_file")
                else:
                    logger.error("test.hadoop.tar_file should be specified")
                    raise ParameterException("test.hadoop.tar_file should be "
                                             "specified")