예제 #1
0
파일: utils.py 프로젝트: daespinel/enoslib
def grid_reload_from_id(gridjob):
    logger.info("Reloading the resources from oargrid job %s", gridjob)
    gridjob = int(gridjob)
    nodes = ex5.get_oargrid_job_nodes(gridjob)

    job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    subnets = []
    for (job_id, site) in job_sites:
        vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
        vlans.extend([{
            "site": site,
            "vlan_id": vlan_id
        } for vlan_id in vlan_ids])
        # NOTE(msimonin): this currently returned only one subnet
        # even if several are reserved
        # We'll need to patch execo the same way it has been patched for vlans
        ipmac, info = ex5.get_oar_job_subnets(job_id, site)
        if not ipmac:
            logger.debug("No subnet information found for this job")
            continue
        subnet = {
            "site": site,
            "ipmac": ipmac,
        }
        subnet.update(info)
        # Mandatory key when it comes to concretize resources
        subnet.update({"network": info["ip_prefix"]})
        subnets.append(subnet)
    return nodes, vlans, subnets
예제 #2
0
파일: utils.py 프로젝트: badock/enoslib
def concretize_resources(resources, gridjob, reservation_type):
    if reservation_type == "oar":
        nodes = ex5.get_oar_job_nodes(gridjob)
    else:
        nodes = ex5.get_oargrid_job_nodes(gridjob)

    concretize_nodes(resources, nodes)

    if reservation_type == "oar":
        # This block is in charge of detecting the site of the oar reservation
        site_candidates = []
        for network_description in resources.get("machines", []):
            cluster = network_description.get("cluster")
            site_candidates += [ex5.get_cluster_site(cluster)]
        for network_description in resources.get("networks", []):
            site_candidates += [network_description.get("site", "unknown")]
        if len(set(site_candidates)) == 1:
            site = site_candidates[0]
        else:
            raise "Could not detect the g5k site of the oarjob %s" % gridjob
        job_sites = [(gridjob, site)]
    else:
        job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    for (job_id, site) in job_sites:
        vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
        vlans.extend([{
            "site": site,
            "vlan_id": vlan_id
        } for vlan_id in vlan_ids])

    concretize_networks(resources, vlans)
예제 #3
0
파일: g5k.py 프로젝트: ivotron/enos
    def _get_jobs_and_vlans(self, conf):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        provider_conf = conf['provider']
        # Look if there is a running job or make a new reservation
        gridjob, _ = EX5.planning.get_job_by_name(provider_conf['name'])

        if gridjob is None:
            gridjob = self._make_reservation(conf)
        else:
            logging.info("Using running oargrid job %s" % gridjob)

        # Wait for the job to start
        EX5.wait_oargrid_job_start(gridjob)

        nodes = sorted(EX5.get_oargrid_job_nodes(gridjob),
                       key=lambda n: n.address)

        # Checking the number of nodes given
        # the disribution policy
        self._check_nodes(nodes=nodes,
                          resources=conf['resources'],
                          mode=provider_conf['role_distribution'])

        # vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(gridjob)
        jobs = []
        vlans = []
        for (job_id, site) in job_sites:
            jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
        return (jobs, vlans, nodes)
예제 #4
0
def grid_reload_from_id(gridjob):
    logger.info("Reloading the resources from oargrid job %s", gridjob)
    gridjob = int(gridjob)
    nodes = ex5.get_oargrid_job_nodes(gridjob)

    job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    subnets = []
    for (job_id, site) in job_sites:
        vlans, subnets = get_network_info_from_job_id(job_id, site, vlans,
                                                      subnets)
    return nodes, vlans, subnets
예제 #5
0
파일: g5k.py 프로젝트: asimonet/enos
    def _get_job(self):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        # Look if there is a running job or make a new reservation
        self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name'])

        if self.gridjob is None:
            self._make_reservation()
        else:
            logging.info("Using running oargrid job %s" % self.gridjob)

        # Wait for the job to start
        EX5.wait_oargrid_job_start(self.gridjob)

        # # XXX Still useful?
        # attempts = 0
        # self.nodes = None
        # while self.nodes is None and attempts < MAX_ATTEMPTS:
        #     self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
        #                             key = lambda n: n.address)
        #     attempts += 1

        self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
                            key=lambda n: n.address)

        # # XXX check already done into `_deploy`.
        self._check_nodes(nodes=self.nodes,
                          resources=self.config['resources'],
                          mode=self.config['role_distribution'])

        # XXX(Ad_rien_) Start_date is never used, deadcode? - August
        # 11th 2016
        self.start_date = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'start_date' in job_info:
            self.start_date = job_info['start_date']

        # filling some information about the jobs here
        self.user = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'user' in job_info:
            self.user = job_info['user']

        # vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob)
        self.jobs = []
        self.vlans = []
        for (job_id, site) in job_sites:
            self.jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))
예제 #6
0
파일: utils.py 프로젝트: msimonin/deploy5k
def concretize_resources(resources, gridjob):
    nodes = ex5.get_oargrid_job_nodes(gridjob)
    concretize_nodes(resources, nodes)

    job_sites = ex5.get_oargrid_job_oar_jobs(gridjob)
    vlans = []
    for (job_id, site) in job_sites:
        vlan_ids = ex5.get_oar_job_kavlan(job_id, site)
        vlans.extend([{
            "site": site,
            "vlan_id": vlan_id} for vlan_id in vlan_ids])

    concretize_networks(resources, vlans)
예제 #7
0
    def get_job(self):
        """Get the hosts from an existing job (if any) or from a new job.
        This will perform a reservation if necessary."""

        # Look if there is a running job or make a new reservation
        self.gridjob, _ = EX5.planning.get_job_by_name(self.config['name'])

        if self.gridjob is None:
            self._make_reservation()
        else:
            logger.info("Using running oargrid job %s" % style.emph(self.gridjob))
            

        # Wait for the job to start
        EX5.wait_oargrid_job_start(self.gridjob)

        attempts = 0
        self.nodes = None
        while self.nodes is None and attempts < MAX_ATTEMPTS:
            self.nodes = sorted(EX5.get_oargrid_job_nodes(self.gridjob),
                                    key = lambda n: n.address)
            attempts += 1

        check_nodes(
                nodes = self.nodes,
                resources = self.config['resources'],
                mode = self.config['role_distribution'])

        # TODO - Start_date is never used, deadcode ? Ad_rien_ - August 11th 2016
        self.start_date = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'start_date' in job_info:
            self.start_date = job_info['start_date']

        ## filling some information about the jobs here
        self.user = None
        job_info = EX5.get_oargrid_job_info(self.gridjob)
        if 'user' in job_info:
            self.user = job_info['user']

        ## vlans information
        job_sites = EX5.get_oargrid_job_oar_jobs(self.gridjob)
        self.jobs = []
        self.vlans = []
        for (job_id, site) in job_sites:
            self.jobs.append((site, job_id))
            vlan_id = EX5.get_oar_job_kavlan(job_id, site)
            if vlan_id is not None:
                self.vlans.append((site, EX5.get_oar_job_kavlan(job_id, site)))

        return self.gridjob
예제 #8
0
파일: utils.py 프로젝트: badock/vm5k
def get_oargrid_job_vm5k_resources(oargrid_job_id):
    """Retrieve the hosts list and (ip, mac) list by sites from an
    oargrid_job_id and return the resources dict needed by vm5k_deployment,
    with kavlan-global if used in the oargrid job """
    oargrid_job_id = int(oargrid_job_id)
    logger.info('Waiting job start')
    wait_oargrid_job_start(oargrid_job_id)
    resources = get_oar_job_vm5k_resources([(oar_job_id, site)
                                            for oar_job_id, site in
                                            get_oargrid_job_oar_jobs(oargrid_job_id)])
    kavlan_global = None
    for site, res in resources.iteritems():
        if res['kavlan'] >= 10:
            kavlan_global = {'kavlan': res['kavlan'],
                             'ip_mac': resources[site]['ip_mac'],
                             'site': site}
            break
    if kavlan_global:
        resources['global'] = kavlan_global

    return resources
예제 #9
0
파일: planning.py 프로젝트: msimonin/execo
def get_job_by_name(job_name, sites=None):
    """ """
    logger.detail('Looking for a job named %s', style.emph(job_name))
    if not sites:
        sites = get_g5k_sites()
    oargrid_jobs = get_current_oargrid_jobs()
    if len(oargrid_jobs) > 0:
        for g_job in oargrid_jobs:
            for job in get_oargrid_job_oar_jobs(g_job):
                info = get_oar_job_info(job[0], job[1])
                if info['name'] == job_name:
                    logger.info('Oargridjob %s found !', style.emph(g_job))
                    return g_job, None
    running_jobs = get_current_oar_jobs(sites)
    for job in running_jobs:
        info = get_oar_job_info(job[0], job[1])
        if info['name'] == job_name:
            logger.info('Job %s found on site %s !', style.emph(job[0]),
                        style.host(job[1]))
            return job
    return None, None
예제 #10
0
파일: utils.py 프로젝트: jonglezb/vm5k
def get_oargrid_job_vm5k_resources(oargrid_job_id):
    """Retrieve the hosts list and (ip, mac) list by sites from an
    oargrid_job_id and return the resources dict needed by vm5k_deployment,
    with kavlan-global if used in the oargrid job """
    oargrid_job_id = int(oargrid_job_id)
    logger.info('Waiting job start')
    wait_oargrid_job_start(oargrid_job_id)
    resources = get_oar_job_vm5k_resources([(oar_job_id, site)
                                            for oar_job_id, site in
                                            get_oargrid_job_oar_jobs(oargrid_job_id)])
    kavlan_global = None
    for site, res in resources.iteritems():
        if res['kavlan'] >= 10:
            kavlan_global = {'kavlan': res['kavlan'],
                             'ip_mac': resources[site]['ip_mac'],
                             'site': site}
            break
    if kavlan_global:
        resources['global'] = kavlan_global

    return resources
예제 #11
0
def main():
    copy_outputs('config.log', 'config.log')
    args = parser.parse_args()
    whoami = os.getlogin()
    logger.info('whoami: %s', whoami)   
    
    jobids = []
    if args.grid_job_id == None:
        jobids = args.job_ids
    else:
        grid_job_id = int(args.grid_job_id[0])
        jobids = ["%s:%d" % (site, job_id) for job_id, site in get_oargrid_job_oar_jobs(grid_job_id)]

    logger.info('Using jobs %s', style.emph(' '.join(jobids)))
    
    sites  = [j.strip().split(':')[0] for j in jobids]
    frontends = [str('frontend.'+s) for s in sites]
    oar_ids  = [j.strip().split(':')[1] for j in jobids]
    
    jobids_list=[(int(j.strip().split(':')[1]),str(j.strip().split(':')[0])) for j in jobids]
#    print sites
#    print oar_ids
#    print jobids_list 
#    print frontends

    logger.info("Get list of associated nodes")
    nodes = [ job_nodes for job in jobids_list for job_nodes in get_oar_job_nodes(*job) ]
 #   logger.info('%s', hosts_list(nodes))
    logger.info('%s', nodes)
 
    logger.info("Deploying %i nodes" % (len(nodes),))
    deployed, undeployed = deploy(Deployment(nodes, env_name = "jessie-x64-nfs"))
    logger.info("%i deployed, %i undeployed" % (len(deployed), len(undeployed)))
 
    ## Configure Host OSes
    logger.info('Finalize node customization')
    # use root to connect on the host
    default_connection_params['user'] = '******'

    ## Copy local .ssh to remote nodes: 
    logger.info('Copy ssh entries into root of each node')
    Put(nodes, ['/home/'+whoami+'/.ssh/id_rsa','/home/'+whoami+'/.ssh/id_rsa.pub'],'.ssh/.').run()

    ## Install missing packages
    logger.info('| - Install Packages')   
    install_packages = TaktukRemote('export DEBIAN_MASTER=noninteractive ; export https_proxy="https://proxy:3128"; apt-get -o Acquire::Check-Valid-Until=false update && apt-get install -y --force-yes python-pip lynx openjdk-8-jdk uuid-runtime cpufrequtils kanif -o Acquire::Check-Valid-Until=false -o Dpkgtions::="--force-confdef" -o Dpkgtions::="--force-confold" ; pip install tabview', nodes).run()
    ## Fix ulimit and related stuffs
    logger.info('| - set limit related stuffs')
    cmd = 'ulimit -c unlimited; sysctl -w vm.max_map_count=331072 ; echo 120000 > /proc/sys/kernel/threads-max ; echo 600000 > /proc/sys/vm/max_map_count ; echo 200000 > /proc/sys/kernel/pid_max' 
    TaktukRemote(cmd, nodes).run()

    ## Copy the DHT-EXP hierarchy to the remote site
    logger.info('Copy sloth and injector files on each NFS server involved in the experiment')
    TaktukRemote('mkdir -p ~/SLOTH-EXP-TMP/', frontends, connection_params={'user': str(whoami)}).run()
    TaktukPut(frontends, ['./SLOTH_HOME' ],'./SLOTH-EXP-TMP/.', connection_params={'user': str(whoami)}).run()
    TaktukPut(frontends, ['./INJECTOR_HOME' ], './SLOTH-EXP-TMP/.', connection_params={'user': str(whoami)}).run()


    ## Prepare the address file for the sloth peers (please remind that the last node is dedicated for the injector
    logger.info('Prepare the peers list')
    
    f1 = open('./hosts.info', 'w')
    f2 = open('./peers.info', 'w')

    logger.info('enumerate %s', enumerate(nodes[:-1]))

    i = 0
    for j, node in enumerate(nodes[:-1]):
        f1.write("%s\n" % (node.address))
        for cores in range(get_host_attributes(node)['architecture']['smt_size']):
            f2.write("%s:%d:%d\n" % (node.address, 3000 + i, 8000 + i))
            i = i + 1
    f1.close()
    f2.close()

    f = open('./service_node.info', 'w')
    f.write("%s" % nodes[-1].address) 
    f.close()
    
    logger.info("Nodes are now ready, you should launch ./runExperiment.sh ... from the lyon frontend")
    logger.info("The list of sloth peers is in ./peers.info")
    logger.info("The service node is in ./service_node.info")
    logger.info("The injector will run on %s" % nodes[-1].address)
    logger.info("The usual(max)  command should be : ./runExperiment.sh in_vivo %d %s/peers.info %s" % (i,os.getcwd(),nodes[-1].address))