Beispiel #1
0
    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [
                os.path.join(self.local_base_conf_dir, f)
                for f in os.listdir(self.local_base_conf_dir)
            ]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = [
            CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE, YARN_CONF_FILE
        ]

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " +
                    str(missing_conf_files))

        remote_missing_files = [
            os.path.join(self.conf_dir, f) for f in missing_conf_files
        ]

        action = Get([self.master], remote_missing_files, self.temp_conf_dir)
        action.run()
Beispiel #2
0
    def _copy_base_conf(self):
        """Copy base configuration files to tmp dir."""

        self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp")
        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [os.path.join(self.local_base_conf_dir, f)
                               for f in os.listdir(self.local_base_conf_dir)]
            for f in base_conf_files:
                shutil.copy(f, self.temp_conf_dir)
        else:
            logger.warn(
                "Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        mandatory_files = [CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE,
                           YARN_CONF_FILE]

        missing_conf_files = mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " + str(
            missing_conf_files))

        remote_missing_files = [os.path.join(self.conf_dir, f)
                                for f in missing_conf_files]

        action = Get([self.master], remote_missing_files, self.temp_conf_dir)
        action.run()
Beispiel #3
0
    def copy_history(self, dest, job_ids=None):
        """Copy history logs from dfs.

        Args:
          dest (str):
            The path of the local dir where the logs will be copied.
          job_ids (list of str, optional):
            A list with the ids of the jobs for which the history should be
            copied. If nothing is passed, the history of all jobs is copied.
        """

        if not os.path.exists(dest):
            logger.warning("Destination directory " + dest +
                           " does not exist. It will be created")
            os.makedirs(dest)

        # Dirs used
        user_login = getpass.getuser()
        hist_dfs_dir = "/tmp/hadoop-yarn/staging/history/done_intermediate/" + \
                       user_login
        hist_tmp_dir = "/tmp/hadoop_hist"

        # Remove file in tmp dir if exists
        proc = SshProcess("rm -rf " + hist_tmp_dir, self.master)
        proc.run()

        # Get files in master
        if job_ids:
            proc = SshProcess("mkdir " + hist_tmp_dir, self.master)
            proc.run()
            for jid in job_ids:
                self.execute("fs -get " + hist_dfs_dir + "/" + jid + "* " +
                             hist_tmp_dir,
                             verbose=False)
        else:
            self.execute("fs -get " + hist_dfs_dir + " " + hist_tmp_dir,
                         verbose=False)

        # Copy files from master
        action = Get([self.master], [hist_tmp_dir], dest)
        action.run()
Beispiel #4
0
    def copy_history(self, dest, job_ids=None):
        """Copy history logs from dfs.

        Args:
          dest (str):
            The path of the local dir where the logs will be copied.
          job_ids (list of str, optional):
            A list with the ids of the jobs for which the history should be
            copied. If nothing is passed, the history of all jobs is copied.
        """

        if not os.path.exists(dest):
            logger.warning("Destination directory " + dest +
                           " does not exist. It will be created")
            os.makedirs(dest)

        # Dirs used
        user_login = getpass.getuser()
        hist_dfs_dir = "/tmp/hadoop-yarn/staging/history/done_intermediate/" + \
                       user_login
        hist_tmp_dir = "/tmp/hadoop_hist"

        # Remove file in tmp dir if exists
        proc = SshProcess("rm -rf " + hist_tmp_dir, self.master)
        proc.run()

        # Get files in master
        if job_ids:
            proc = SshProcess("mkdir " + hist_tmp_dir, self.master)
            proc.run()
            for jid in job_ids:
                self.execute("fs -get " + hist_dfs_dir + "/" + jid + "* " +
                             hist_tmp_dir, verbose=False)
        else:
            self.execute("fs -get " + hist_dfs_dir + " " + hist_tmp_dir,
                         verbose=False)

        # Copy files from master
        action = Get([self.master], [hist_tmp_dir], dest)
        action.run()
Beispiel #5
0
    def _initialize_conf(self):
        """Merge locally-specified configuration files with default files
        from the distribution."""

        if os.path.exists(self.local_base_conf_dir):
            base_conf_files = [os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir)]
            for f in base_conf_files:
                shutil.copy(f, self.init_conf_dir)
        else:
            logger.warn("Local conf dir does not exist. Using default configuration")
            base_conf_files = []

        missing_conf_files = self.conf_mandatory_files
        for f in base_conf_files:
            f_base_name = os.path.basename(f)
            if f_base_name in missing_conf_files:
                missing_conf_files.remove(f_base_name)

        logger.info("Copying missing conf files from master: " + str(missing_conf_files))

        remote_missing_files = [os.path.join(self.conf_dir, f) for f in missing_conf_files]

        action = Get([self.hosts[0]], remote_missing_files, self.init_conf_dir)
        action.run()
Beispiel #6
0
    def workflow(self, comb, host, comb_dir):
        """ """
        comb_ok = False
        thread_name = style.Thread(host.split('.')[0]) + ': '
        logger.info(thread_name + 'Starting combination ' + slugify(comb))

        try:
            logger.info(thread_name + 'Generate conf file')
            param_str = self.create_string(comb)

            Remote(
                "python /home/Work/sgcbntier/paasage_demo/xml_gen_execo.py --cb "
                + param_str, [host]).run()

            logger.info(thread_name + 'Run code')
            Remote(
                "cd /home/Work/sgcbntier/paasage_demo/ ; python run_all_execo.py --cb %s"
                % param_str, [host]).run()

            logger.info(thread_name + 'Get results')

            traceFile = "ntier_" + param_str
            get_results = Get([host], [
                "/home/Work/sgcbntier/paasage_demo/csv/REQTASK_" + traceFile +
                ".csv"
            ],
                              local_location=comb_dir).run()

            for p in get_results.processes:
                if not p.ok:
                    logger.error(
                        host +
                        ': Unable to retrieve the files for combination %s',
                        slugify(comb))
                    exit()

            comb_ok = True
        finally:
            if comb_ok:
                self.sweeper.done(comb)
                logger.info(thread_name + ': ' + slugify(comb) + \
                             ' has been done')
            else:
                self.sweeper.cancel(comb)
                logger.warning(thread_name + ': ' + slugify(comb) + \
                            ' has been canceled')
        logger.info(style.step('%s Remaining'),
                    len(self.sweeper.get_remaining()))
Beispiel #7
0
    def run_xp(self):

        master = self.cluster[0]
        opt = ''
        """Iterate over the parameters and execute the bench"""
        while len(self.sweeper.get_remaining()) > 0:
            # Take sweeper
            comb = self.sweeper.get_next()

            logger.info('Processing new combination %s' % (comb, ))

            try:
                # metric from linux sar tools, works with clock
                def takeMetric(
                        path,
                        startTime,
                        endTime,
                        metric=['cpu', 'mem', 'disk', 'swap', 'network']):
                    opt = ''
                    cmd_template_sar = (
                        "sar -f /var/log/sysstat/sa* -{opt} -s {startTime} -e {endTime}"
                    )
                    for met in metric:
                        if met == 'cpu':
                            opt = 'u'
                        elif met == 'mem':
                            opt = 'r'
                        elif met == 'disk':
                            opt = 'dp'
                        elif met == 'swap':
                            opt = 'S'
                        elif met == 'network':
                            opt = 'n DEV'

                        cmd = cmd_template_sar.format(opt=opt,
                                                      startTime=startTime,
                                                      endTime=endTime)
                        for host in self.cluster:
                            hE = SshProcess(cmd,
                                            host,
                                            connection_params={'user': '******'})
                            hE.run()
                            stdMetric = host + '-' + met + '.txt'
                            with open(os.path.join(path, stdMetric),
                                      "w") as sout:
                                sout.write(hE.stdout)

                #Set CPU Freq and Policy according current combination
                cmd_template_Freq_Policy = ("cpufreq-set -r  -g {policy}")
                cmd_template_Freq = ("cpufreq-set -r -f {freq}")

                if comb['Freq'] == 'OnDemand':
                    cmd_freq_policy = cmd_template_Freq_Policy.format(
                        policy='ondemand')
                    Remote(cmd_freq_policy,
                           master,
                           connection_params={
                               'user': '******'
                           }).run()
                elif comb['Freq'] == 'conservative':
                    cmd_freq_policy = cmd_template_Freq_Policy.format(
                        policy='conservative')
                    Remote(cmd_freq_policy,
                           master,
                           connection_params={
                               'user': '******'
                           }).run()
                else:
                    cmd_freq_policy = cmd_template_Freq_Policy.format(
                        policy='userspace')
                    Remote(cmd_freq_policy,
                           master,
                           connection_params={
                               'user': '******'
                           }).run()
                    cmd_freq = cmd_template_Freq.format(freq=comb['Freq'])
                    Remote(cmd_freq,
                           master,
                           connection_params={
                               'user': '******'
                           }).run()

                # build command
                src = 'source /opt/intel-performance-snapshoot/apsvars.sh'
                cmd_mpirun_template = (
                    "mpirun {opt} -f /root/cluster.txt -np {pr1} aps -r '/tmp/log/' /tmp/NPB/npb-mpi/bin/{typeNPB}.{NPBclass}.{pr2}"
                )
                cmd_mpirun = cmd_mpirun_template.format(
                    opt='',
                    pr1=comb['n_core'],
                    typeNPB=comb['Benchmark'],
                    NPBclass=comb['NPBclass'],
                    pr2=comb['n_core'])
                cmd = "{}; /tmp/NPB/bin/runMPI.sh '{}' '{}'".format(
                    src, cmd_mpirun, slugify(comb))

                curPath = self.result_dir + slugify(comb)

                # run Mpi through execo remote SshProcess
                def runMpi(cmd):
                    act = SshProcess(cmd,
                                     master,
                                     connection_params={'user': '******'},
                                     shell=True)
                    act.run()

                    if not os.path.exists(curPath):
                        os.makedirs(curPath)

                    with open(os.path.join(curPath, "stdout.txt"),
                              "a+") as sout, open(
                                  os.path.join(curPath, "stderr.txt"),
                                  "w") as serr:
                        sout.write(act.stdout)
                        serr.write(act.stderr)
                    return act.ok

                # start clock and exec command in the master node
                time.sleep(5)
                startUnix = int(time.time())
                start24Hour = datetime.datetime.fromtimestamp(
                    startUnix).strftime('%H:%M:%S')

                task1 = runMpi(cmd)

                endUnix = int(time.time())
                end24Hour = datetime.datetime.fromtimestamp(endUnix).strftime(
                    '%H:%M:%S')
                time.sleep(5)

                with open(os.path.join(curPath, "executionTime.txt"),
                          "w") as sout:
                    sout.write(
                        'ExecTime:{}\nStartDate:{}\nEndDate:{}\n'.format(
                            str(endUnix - startUnix), start24Hour, end24Hour))

                takeMetric(curPath, start24Hour, end24Hour,
                           ['cpu', 'mem', 'disk', 'swap', 'network'])

                # collect power from kWAPI: grid5000 infrastructure made tool
                for hostname in self.cluster:
                    powerOut = '{}_power'.format(hostname)
                    collect_metric(startUnix, endUnix, 'power', curPath,
                                   self.site, powerOut, hostname)

                st = '/tmp/out/' + slugify(comb)
                intelAppPerf = str(st + '.html')

                # get the data from ['Application Performance Snapshot', 'Storage Performance Snapshot']
                # https://software.intel.com/en-us/performance-snapshot
                Get(master, [intelAppPerf],
                    curPath,
                    connection_params={
                        'user': '******'
                    }).run()
                if task1:
                    logger.info("comb ok: %s" % (comb, ))
                    self.sweeper.done(comb)
                    continue

            except OSError as err:
                print("OS error: {0}".format(err))
            except ValueError:
                print("Could not convert data to an integer.")
            except:
                print("Unexpected error:", sys.exc_info()[0])
                raise

            logger.info("comb NOT ok: %s" % (comb, ))
            self.sweeper.cancel(comb)