Ejemplo n.º 1
0
    def setup(self):
        """Setup the cluster of hosts. Depending on the engine parameters it
        will bootstrap hadoop directly or deploy a given environment.
        """

        self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend)

        if self.use_kadeploy:
            (deployed, undeployed) = self.deploy_nodes()
            return (len(deployed) != 0)
        else:
            if not self.hc:
                self.hc = HadoopCluster(self.hosts)
            self.hc.bootstrap(self.hadoop_tar_file)
            return True
Ejemplo n.º 2
0
    def setup(self):
        """Setup the cluster of hosts. Depending on the engine parameters it
        will bootstrap hadoop directly or deploy a given environment.
        """

        self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend)

        if self.use_kadeploy:
            (deployed, undeployed) = self.deploy_nodes()
            return (len(deployed) != 0)
        else:
            if not self.hc:
                self.hc = HadoopCluster(self.hosts)
            self.hc.bootstrap(self.hadoop_tar_file)
            return True
Ejemplo n.º 3
0
    def setup(self):
        """Setup the cluster of hosts. Optionally deploy env and then copy the
        executable jar to all the nodes.
        """

        self.hosts = get_oar_job_nodes(self.oar_job_id, self.frontend)

        if self.use_kadeploy:
            (deployed, undeployed) = self.deploy_nodes()
            return (len(deployed) != 0)

        copy_code = TaktukPut(self.hosts, [self.jar_file], self.remote_dir)
        copy_code.run()

        return True
Ejemplo n.º 4
0
    def run(self):
        """Execute a test suite. The execution workflow is as follows:

        1. Parse command-line arguments.

        2. Define the parameters of the tests from the specified configuration
        file. Generate all the combination to test from the given parameters.

        3. Consume the combinations.

          3.1. Setup the cluster if it has not been done (first time or after a
          reservation ends.

          3.2. Load the dataset into the Hadoop cluster.

          3.3. Perform the experiments corresponding to the combinations linked
          to the loaded dataset.

        4. Clean all resources.
        """

        # Get parameters
        self.cluster = self.args[0]
        self.n_nodes = int(self.args[1])
        self.config_file = self.args[2]
        self.site = get_cluster_site(self.cluster)

        if not os.path.exists(self.config_file):
            logger.error("Params file " + self.config_file + " does not exist")
            sys.exit(1)

        # Set oar job id
        if self.options.oar_job_id:
            self.oar_job_id = self.options.oar_job_id
        else:
            self.oar_job_id = None

        # Main
        try:
            # Creation of the main iterator used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While they are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:

                # SETUP
                # If no job, we make a reservation and prepare the hosts for the
                # experiments
                if job_is_dead or self.oar_job_id is None:
                    self.make_reservation()
                    success = self.setup()
                    if not success:
                        break
                else:
                    self.hosts = get_oar_job_nodes(self.oar_job_id,
                                                   self.frontend)
                if not self.hc:
                    self.hc = HadoopCluster(self.hosts)
                # SETUP FINISHED

                # Getting the next combination (which requires a ds deployment)
                comb = self.sweeper.get_next()
                self.raw_comb = comb.copy()
                self.comb = comb
                self.prepare_dataset(comb)
                self.xp_wrapper(comb)

                # subloop over the combinations that use the same dataset
                while True:
                    newcomb = self.sweeper.get_next(
                        lambda r: filter(self._uses_same_ds, r))
                    if newcomb:
                        self.raw_comb = newcomb.copy()
                        try:
                            self.xp_wrapper(newcomb)
                        except:
                            break
                    else:
                        break

                if get_oar_job_info(self.oar_job_id,
                                    self.frontend)['state'] == 'Error':
                    job_is_dead = True

        finally:
            if self.oar_job_id is not None:
                if not self.options.keep_alive:
                    pass
                    logger.info('Deleting job')
                    oardel([(self.oar_job_id, self.frontend)])
                else:
                    logger.info('Keeping job alive for debugging')

            # Clean cluster
            if self.hc:
                if self.hc.initialized:
                    self.hc.clean()

            # Close summary files
            if self.summary_file:
                self.summary_file.close()
            if self.ds_summary_file:
                self.ds_summary_file.close()
Ejemplo n.º 5
0
    def run(self):
        """Execute a test suite. The execution workflow is as follows:

        1. Parse command-line arguments.

        2. Define the parameters of the tests from the specified configuration
        file. Generate all the combination to test from the given parameters.

        3. Consume the combinations.

          3.1. Setup the cluster if it has not been done (first time or after a
          reservation ends.

          3.2. Load the dataset into the Hadoop cluster.

          3.3. Perform the experiments corresponding to the combinations linked
          to the loaded dataset.

        4. Clean all resources.
        """

        # Get parameters
        self.cluster = self.args[0]
        self.n_nodes = int(self.args[1])
        self.config_file = self.args[2]
        self.site = get_cluster_site(self.cluster)

        if not os.path.exists(self.config_file):
            logger.error("Params file " + self.config_file + " does not exist")
            sys.exit(1)

        # Set oar job id
        if self.options.oar_job_id:
            self.oar_job_id = self.options.oar_job_id
        else:
            self.oar_job_id = None

        # Main
        try:
            # Creation of the main iterator used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While they are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:

                # SETUP
                # If no job, we make a reservation and prepare the hosts for the
                # experiments
                if job_is_dead or self.oar_job_id is None:
                    self.make_reservation()
                    success = self.setup()
                    if not success:
                        break
                else:
                    self.hosts = get_oar_job_nodes(self.oar_job_id,
                                                   self.frontend)
                if not self.hc:
                    self.hc = HadoopCluster(self.hosts)
                # SETUP FINISHED

                # Getting the next combination (which requires a ds deployment)
                comb = self.sweeper.get_next()
                self.raw_comb = comb.copy()
                self.comb = comb
                self.prepare_dataset(comb)
                self.xp_wrapper(comb)

                # subloop over the combinations that use the same dataset
                while True:
                    newcomb = self.sweeper.get_next(
                        lambda r: filter(self._uses_same_ds, r))
                    if newcomb:
                        self.raw_comb = newcomb.copy()
                        try:
                            self.xp_wrapper(newcomb)
                        except:
                            break
                    else:
                        break

                if get_oar_job_info(self.oar_job_id,
                                    self.frontend)['state'] == 'Error':
                    job_is_dead = True

        finally:
            if self.oar_job_id is not None:
                if not self.options.keep_alive:
                    pass
                    logger.info('Deleting job')
                    oardel([(self.oar_job_id, self.frontend)])
                else:
                    logger.info('Keeping job alive for debugging')

            # Clean cluster
            if self.hc:
                if self.hc.initialized:
                    self.hc.clean()

            # Close summary files
            if self.summary_file:
                self.summary_file.close()
            if self.ds_summary_file:
                self.ds_summary_file.close()
Ejemplo n.º 6
0
from execo.log import logger, style
from execo_g5k.oar import get_oar_job_nodes
from execo_g5k.utils import hosts_list
from networkx.algorithms.shortest_paths.generic import shortest_path
from execo_g5k.api_utils import get_host_shortname
from random import uniform

jobs = [(1696863, 'grenoble'), (1502558, 'lille'), (74715, 'luxembourg')]

logger.info(
    'Retrieving hosts used for jobs %s', ', '.join([
        style.host(site) + ':' + style.emph(job_id) for job_id, site in jobs
    ]))
hosts = [
    get_host_shortname(h) for job_id, site in jobs
    for h in get_oar_job_nodes(job_id, site)
]
logger.info(hosts_list(hosts))

logger.info('Creating topological graph')
g = g5k_graph(elements=hosts)

i, j = int(uniform(1, len(hosts))), int(uniform(1, len(hosts)))
path = shortest_path(g, hosts[i], hosts[j])
logger.info(
    'Communication between %s and %s go through '
    'the following links: \n%s', style.host(hosts[i]), style.host(hosts[j]),
    ' -> '.join(path))

logger.info('Active links between nodes %s and %s are: \n%s',
            style.host(path[0]), style.host(path[1]),
Ejemplo n.º 7
0
    def run(self):
        """Inherited method, put here the code for running the engine."""

        # Get parameters
        self.cluster = self.args[0]
        self.n_nodes = int(self.args[1])
        self.config_file = self.args[2]
        self.site = get_cluster_site(self.cluster)

        if not os.path.exists(self.config_file):
            logger.error("Params file " + self.config_file + " does not exist")
            sys.exit(1)

        # Set oar job id
        if self.options.oar_job_id:
            self.oar_job_id = self.options.oar_job_id
        else:
            self.oar_job_id = None

        # Main
        try:
            # Creation of the main iterator used for the first control loop.
            self.define_parameters()

            job_is_dead = False
            # While they are combinations to treat
            while len(self.sweeper.get_remaining()) > 0:

                ## SETUP
                # If no job, we make a reservation and prepare the hosts for the
                # experiments
                if job_is_dead or self.oar_job_id is None:
                    self.make_reservation()
                    success = self.setup()
                    if not success:
                        break
                else:
                    self.hosts = get_oar_job_nodes(self.oar_job_id,
                                                   self.frontend)
                ## SETUP FINISHED

                logger.info("Setup finished in hosts " + str(self.hosts))

                test_threads = []
                for h in self.hosts:
                    t = TestThread(h, self.comb_manager, self.stats_manager)
                    test_threads.append(t)
                    t.name = "th_" + str(h.address).split(".")[0]
                    t.start()

                for t in test_threads:
                    t.join()

                if get_oar_job_info(self.oar_job_id,
                                    self.frontend)['state'] == 'Error':
                    job_is_dead = True

        finally:
            if self.oar_job_id is not None:
                if not self.options.keep_alive:
                    pass
                    logger.info('Deleting job')
                    oardel([(self.oar_job_id, self.frontend)])
                else:
                    logger.info('Keeping job alive for debugging')

            # Close stats
            self.stats_manager.close()
Ejemplo n.º 8
0
    blacklisted = ['talc', 'mbi']
    slots = compute_slots(planning, walltime, excluded_elements=blacklisted)
    wanted = {'grid5000': 1}
    start_date, end_date, resources = find_first_slot(slots, wanted)

    for c in filter(lambda x: x in get_g5k_clusters(), resources.keys()):
        if resources[c] > 1:
            wanted = {c: 1}
            break
    jobs_specs = get_jobs_specs(wanted, name=job_name)
    for sub, frontend in jobs_specs:
        sub.walltime = walltime
        sub.job_type = "deploy"
    job = oarsub(jobs_specs)[0]

nodes = get_oar_job_nodes(job[0], job[1])
logger.info('Deploying host %s', nodes[0].address)
deployed, undeployed = deploy(Deployment(nodes, env_name="jessie-x64-base"))

execware_host = list(deployed)[0]
logger.info('Installing required packages %s', style.emph(packages))
install_packages = SshProcess(
    'apt-get update && apt-get install -y ' + packages, execware_host).run()
logger.info('Copying files to host')
put_files = Put(execware_host, [source_code], remote_location="/tmp").run()

xml_file = """
<settings>
     <proxies>
      <proxy>
         <id>g5k-proxy</id>
Ejemplo n.º 9
0
    blacklisted = ['talc', 'mbi']
    slots = compute_slots(planning, walltime, excluded_elements=blacklisted)
    wanted = {'grid5000': 1}
    start_date, end_date, resources = find_first_slot(slots, wanted)

    for c in filter(lambda x: x in get_g5k_clusters(), resources.keys()):
        if resources[c] > 1:
            wanted = {c: 1}
            break
    jobs_specs = get_jobs_specs(wanted, name=job_name)
    for sub, frontend in jobs_specs:
        sub.walltime = walltime
        sub.job_type = "deploy"
    job = oarsub(jobs_specs)[0]

nodes = get_oar_job_nodes(job[0], job[1])
logger.info('Deploying host %s', nodes[0].address)
deployed, undeployed = deploy(Deployment(nodes,
                                         env_name="jessie-x64-base"))

execware_host = list(deployed)[0]
logger.info('Installing required packages %s', style.emph(packages))
install_packages = SshProcess('apt-get update && apt-get install -y '
                              + packages, execware_host).run()
logger.info('Copying files to host')
put_files = Put(execware_host, [source_code], remote_location="/tmp").run()

xml_file = """
<settings>
     <proxies>
      <proxy>
Ejemplo n.º 10
0
#!/usr/bin/env python
from execo_g5k.topology import g5k_graph, treemap
from execo.log import logger, style
from execo_g5k.oar import get_oar_job_nodes
from execo_g5k.utils import hosts_list
from networkx.algorithms.shortest_paths.generic import shortest_path
from execo_g5k.api_utils import get_host_shortname
from random import uniform

jobs = [(1696863, 'grenoble'), (1502558, 'lille'), (74715, 'luxembourg')]

logger.info('Retrieving hosts used for jobs %s',
            ', '.join([style.host(site) + ':' + style.emph(job_id)
                       for job_id, site in jobs]))
hosts = [get_host_shortname(h) for job_id, site in jobs
         for h in get_oar_job_nodes(job_id, site)]
logger.info(hosts_list(hosts))

logger.info('Creating topological graph')
g = g5k_graph(elements=hosts)

i, j = int(uniform(1, len(hosts))), int(uniform(1, len(hosts)))
path = shortest_path(g, hosts[i], hosts[j])
logger.info('Communication between %s and %s go through '
            'the following links: \n%s',
            style.host(hosts[i]),
            style.host(hosts[j]),
            ' -> '.join(path))

logger.info('Active links between nodes %s and %s are: \n%s',
            style.host(path[0]),