def delete(cls, cluster, job, group=None): """ This method is used to terminate a job with the specified or a group of jobs job_id or job_name in a given cluster :param group: :param cluster: the cluster like comet :param job: the job id or name :return: success message or error """ try: if group is not None: # get the job ids from the db cm = CloudmeshDatabase() arguments = {'cluster': cluster, 'group': group} db_jobs = cm.find('batchjob', **arguments) list1 = [] for i in db_jobs: list1.append(db_jobs[i]['job_id']) # read active jobs active_jobs = json.loads(cls.queue(cluster)) list2 = [] for i in active_jobs: list2.append(active_jobs[i]['jobid']) # find intersection res = set(list1).intersection(set(list2)) if res is not None: for j in res: cmd = 'scancel {}'.format(str(j)) Shell.ssh(cluster, cmd) print("Deleted {}".format(j)) return "All jobs for group {} killed successfully".format(group) else: args = 'scancel ' if job.isdigit(): args += job else: args += "-n {}".format(job) Shell.ssh(cluster, args) return "Job {} killed successfully".format(job) except Exception as ex: print("in exceptio") print(ex) return ex
def qsub(self, name, host, script, template=None, kind="dict"): """ Executes the qsub command on a given host. NOTE this method may not yet be fully implemented :param name: name of the script :param host: host on which the script is to be run :param script: The name of the script :param template: The script is wrapped into a template :param kind: The return is passed as dict, yaml, xml :return: """ self.jobid_incr() jobscript = self.create_script(name, script, template) # copy the script to the remote host self._write_to_file(jobscript, name) # copy script to remote host remote_path = self.data.get("cloudmesh", "pbs", host, "scripts") print(remote_path) xmkdir(host, remote_path) manager_host = self.manager(host) # call qsub on the remot host r = Shell.scp(name, manager_host + ":" + remote_path) jobid = Shell.ssh(manager_host, "qsub {0}/{1}".format(remote_path, name)).rstrip() return self.jobstatus(host, jobid, kind=kind)
def execute(self, name, command): """executes the command on the named host""" if name in ["localhost"]: r = '\n'.join(Shell.sh("-c", command).split()[-1:]) else: r = '\n'.join(Shell.ssh(name, command).split()[-1:]) return r
def delete(cls, cluster, job): """ This method is used to terminate a job with the specified job_id or job_name in a given cluster :param cluster: the cluster like comet :param job: the job id or name :return: success message or error """ try: args = 'scancel ' if job.isdigit(): args += job else: args += "-n {}".format(job) Shell.ssh(cluster, args) return "Job {} killed successfully".format(job) except Exception as ex: return ex
def rm(cls, cluster, id=None, format=None): data = { "CLUSTER": cluster, "ID": id, } result = None if id is not None: try: result = Shell.ssh(cluster, "rm -rf experiment/{ID}".format(**data)) except Exception, e: pass
def list(cls, cluster, id=None, format=None): data = { "CLUSTER": cluster, "ID": id, } result = None if id is not None: try: result = Shell.ssh(cluster, "ls experiment/{ID}".format(**data)) result = result.split("\n") except Exception, e: result = None
def info(cls, cluster, format='json', all=False): if all: result = Shell.ssh(cluster, 'sinfo --format=\"%all\"') else: result = Shell.ssh( cluster, 'sinfo --format=\"%P|%a|%l|%D|%t|%N\"') # ignor leading lines till header is found l = result.splitlines() for i, res in enumerate(l): if 'PARTITION|AVAIL|' in res: result = "\n".join(l[i:]) break parser = TableParser(strip=False) d = parser.to_dict(result) # add cluster and updated to each entry for key in d.keys(): d[key]['cluster'] = cluster d[key]['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") if format == 'json': return json.dumps(d, indent=4, separators=(',', ': ')) else: return (dict_printer(d, order=['cluster', 'partition', 'avail', 'timelimit', 'nodes', 'state', 'nodelist', 'updated'], output=format))
def nodes(self, host, refresh=True): """ returns the information from the command pbsnodes in a dict. :param host: the name of the host as specified in the .ssh/config file :param refresh: if False, reads returns a cached value if True, issues a new command and refreshes the cach :return: information of the pbsnodes command in a dict """ manager_host = self.manager(host) if self.pbs_nodes_data is None or refresh: try: result = Shell.ssh(manager_host, "pbsnodes", "-a") except: raise RuntimeError( "can not execute pbs nodes on host {0}".format( manager_host)) pbsinfo = {} nodes = result.split("\n\n") for node in nodes: pbs_data = node.split("\n") pbs_data = [e.strip() for e in pbs_data] name = pbs_data[0] if name != "": pbsinfo[name] = {u'name': name} for element in pbs_data[1:]: try: (attribute, value) = element.split(" = ") if attribute == 'status': status_elements = value.split(",") pbsinfo[name][attribute] = {} for e in status_elements: (a, v) = e.split("=") pbsinfo[name][attribute][a] = v elif attribute == 'jobs': pbsinfo[name][attribute] = value.split(',') elif attribute == 'note' and ( value.strip().startswith("{") or value.strip().startswith("[")): pbsinfo[name][attribute] = literal_eval(value) else: pbsinfo[name][attribute] = value except: pass self.pbs_nodes_data = pbsinfo return self.pbs_nodes_data
def nodes(self, host, refresh=True): """ returns the information from the command pbsnodes in a dict. :param host: the name of the host as specified in the .ssh/config file :param refresh: if False, reads returns a cached value if True, issues a new command and refreshes the cach :return: information of the pbsnodes command in a dict """ manager_host = self.manager(host) if self.pbs_nodes_data is None or refresh: try: result = Shell.ssh(manager_host, "pbsnodes", "-a") except: raise RuntimeError( "can not execute pbs nodes on host {0}".format(manager_host)) pbsinfo = {} nodes = result.split("\n\n") for node in nodes: pbs_data = node.split("\n") pbs_data = [e.strip() for e in pbs_data] name = pbs_data[0] if name != "": pbsinfo[name] = {u'name': name} for element in pbs_data[1:]: try: (attribute, value) = element.split(" = ") if attribute == 'status': status_elements = value.split(",") pbsinfo[name][attribute] = {} for e in status_elements: (a, v) = e.split("=") pbsinfo[name][attribute][a] = v elif attribute == 'jobs': pbsinfo[name][attribute] = value.split(',') elif attribute == 'note' and ( value.strip().startswith("{") or value.strip().startswith("[")): pbsinfo[name][attribute] = literal_eval(value) else: pbsinfo[name][attribute] = value except: pass self.pbs_nodes_data = pbsinfo return self.pbs_nodes_data
def queue(cls, cluster, format='json', job=None): try: args = 'squeue ' if job is not None: if job.isdigit(): args += ' -j {} '.format(job) # search by job id else: args += ' -n {} '.format(job) # search by job name f = '--format=%all' args += f result = Shell.ssh(cluster, args) # TODO: process till header is found...(Need a better way) l = result.splitlines() for i, res in enumerate(l): if 'ACCOUNT|GRES|' in res: result = "\n".join(l[i:]) break parser = TableParser(strip=True) d = parser.to_dict(result) # add cluster and updated to each entry for key in d.keys(): d[key]['cluster'] = cluster d[key]['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") if format == 'json': return json.dumps(d, indent=4, separators=(',', ': ')) else: return (dict_printer(d, order=['cluster', 'jobid', 'partition', 'name', 'user', 'st', 'time', 'nodes', 'nodelist', 'updated'], output=format)) except Exception as ex: return ex
def output(cls, cluster, id=None, format=None): data = { "CLUSTER": cluster, "ID": id, } result = None if id is None: ids = list(cluster) else: ids = [id] result = [] for i in ids: try: result.append(Shell.ssh(cluster, "cat experiment/{}/*.out".format(i))) except: result.append("") # if result == []: # result = None return result
def jobstatus(self, host, jobid, kind='dict'): """ The status of a specific job :param host: The host on which the job is running :param jobid: The jobid as specified by the queing system :param kind: The output can be returned as dict, xml, and yaml :return: """ manager_host = self.manager(host) qstat_xml_data = Shell.ssh(manager_host, "qstat", "-x", jobid).rstrip() if kind == 'xml': r = qstat_xml_data else: r = self.qstat_xml_to_dict(qstat_xml_data) r[unicode(jobid)][u"cm_jobid"] = self.jobid r[unicode(jobid)]["cm_Variable_list"] = self.variable_list(r) if kind == 'yaml': r = yaml.dump(r, default_flow_style=False) return r
def qstat(self, host, user=True, format='dict'): """ executes the qstat command on a particular host and returns the information as dict. :param host: The host as specified in ~/.ssh/config :param user: If True, only retirns information for the user If False, all jobs for all users are returned :param format: :return: """ data = None username = self.username(host) manager_host = self.manager(host) xml_data = Shell.ssh(manager_host, "qstat", "-x").rstrip() if format == 'dict': data = OpenPBS.qstat_xml_to_dict(xml_data) selected_data = {} for jobid in data: (owner, cm_host) = data[jobid]['Job_Owner'].split('@') if not user: selected_data[unicode(jobid)] = data[unicode(jobid)] elif owner == username: selected_data[unicode(jobid)] = data[unicode(jobid)] data = selected_data for jobid in data: data[unicode(jobid)][u"cm_jobid"] = jobid if "Variable_list" in data[unicode(jobid)]: data[unicode( jobid)][u"cm_Variable_list"] = self.variable_list( data, jobid) elif format == "xml": if user is not None: print("WARNING: " "restrictiong xml data for a user not supported.") data = xml_data return dict(data)
def qstat(self, host, user=True, format='dict'): """ executes the qstat command on a particular host and returns the information as dict. :param host: The host as specified in ~/.ssh/config :param user: If True, only retirns information for the user If False, all jobs for all users are returned :param format: :return: """ data = None username = self.username(host) manager_host = self.manager(host) xml_data = Shell.ssh(manager_host, "qstat", "-x").rstrip() if format == 'dict': data = OpenPBS.qstat_xml_to_dict(xml_data) selected_data = {} for jobid in data: (owner, cm_host) = data[jobid]['Job_Owner'].split('@') if not user: selected_data[unicode(jobid)] = data[unicode(jobid)] elif owner == username: selected_data[unicode(jobid)] = data[unicode(jobid)] data = selected_data for jobid in data: data[unicode(jobid)][u"cm_jobid"] = jobid if "Variable_list" in data[unicode(jobid)]: data[unicode(jobid)][u"cm_Variable_list"] = self.variable_list(data, jobid) elif format == "xml": if user is not None: print("WARNING: " "restrictiong xml data for a user not supported.") data = xml_data return dict(data)
def pbsnodes_data(manager_host): result = str( Shell.ssh(manager_host, "pbsnodes", "-l", "-n"))[:-1] return result
def pbsnodes_data(manager_host): result = str(Shell.ssh(manager_host, "pbsnodes", "-l", "-n"))[:-1] return result
def do_network(self, args, arguments): """ :: Usage: network get fixed [ip] [--cloud=CLOUD] FIXED_IP network get floating [ip] [--cloud=CLOUD] FLOATING_IP_ID network reserve fixed [ip] [--cloud=CLOUD] FIXED_IP network unreserve fixed [ip] [--cloud=CLOUD] FIXED_IP network associate floating [ip] [--cloud=CLOUD] [--group=GROUP] [--instance=INS_ID_OR_NAME] [FLOATING_IP] network disassociate floating [ip] [--cloud=CLOUD] [--group=GROUP] [--instance=INS_ID_OR_NAME] [FLOATING_IP] network create floating [ip] [--cloud=CLOUD] [--pool=FLOATING_IP_POOL] network delete floating [ip] [--cloud=CLOUD] FLOATING_IP... network list floating pool [--cloud=CLOUD] network list floating [ip] [--cloud=CLOUD] [--instance=INS_ID_OR_NAME] [IP_OR_ID] network create cluster --group=demo_group network -h | --help Options: -h help message --cloud=CLOUD Name of the IaaS cloud e.g. india_openstack_grizzly. --group=GROUP Name of the group in Cloudmesh --pool=FLOATING_IP_POOL Name of Floating IP Pool --instance=INS_ID_OR_NAME ID or Name of the vm instance Arguments: IP_OR_ID IP Address or ID of IP Address FIXED_IP Fixed IP Address, e.g. 10.1.5.2 FLOATING_IP Floating IP Address, e.g. 192.1.66.8 FLOATING_IP_ID ID associated with Floating IP, e.g. 185c5195-e824-4e7b-8581-703abec4bc01 Examples: $ network get fixed ip --cloud=india 10.1.2.5 $ network get fixed --cloud=india 10.1.2.5 $ network get floating ip --cloud=india 185c5195-e824-4e7b-8581-703abec4bc01 $ network get floating --cloud=india 185c5195-e824-4e7b-8581-703abec4bc01 $ network reserve fixed ip --cloud=india 10.1.2.5 $ network reserve fixed --cloud=india 10.1.2.5 $ network unreserve fixed ip --cloud=india 10.1.2.5 $ network unreserve fixed --cloud=india 10.1.2.5 $ network associate floating ip --cloud=india --instance=albert-001 192.1.66.8 $ network associate floating --cloud=india --instance=albert-001 $ network associate floating --cloud=india --group=albert_group $ network disassociate floating ip --cloud=india --instance=albert-001 192.1.66.8 $ network disassociate floating --cloud=india --instance=albert-001 192.1.66.8 $ network create floating ip --cloud=india --pool=albert-f01 $ network create floating --cloud=india --pool=albert-f01 $ network delete floating ip --cloud=india 192.1.66.8 192.1.66.9 $ network delete floating --cloud=india 192.1.66.8 192.1.66.9 $ network list floating ip --cloud=india $ network list floating --cloud=india $ network list floating --cloud=india 192.1.66.8 $ network list floating --cloud=india --instance=323c5195-7yy34-4e7b-8581-703abec4b $ network list floating pool --cloud=india $ network create cluster --group=demo_group """ # pprint(arguments) # Get the cloud parameter OR read default cloudname = arguments["--cloud"] or Default.get_cloud() if cloudname is None: Console.error("Default cloud has not been set!" "Please use the following to set it:\n" "cm default cloud=CLOUDNAME\n" "or provide it via the --cloud=CLOUDNAME argument.") return "" # Fixed IP info if arguments["get"] \ and arguments["fixed"]: fixed_ip = arguments["FIXED_IP"] result = Network.get_fixed_ip(cloudname, fixed_ip_addr=fixed_ip) Console.msg(result) # Floating IP info elif arguments["get"] \ and arguments["floating"]: floating_ip_id = arguments["FLOATING_IP_ID"] result = Network.get_floating_ip(cloudname, floating_ip_or_id=floating_ip_id) Console.msg(result) # Reserve a fixed ip elif arguments["reserve"] \ and arguments["fixed"]: fixed_ip = arguments["FIXED_IP"] result = Network.reserve_fixed_ip(cloudname=cloudname, fixed_ip_addr=fixed_ip) if result is not None: Console.ok("Reserve fixed ip address [{}] complete.".format(fixed_ip)) # Un-Reserve a fixed ip elif arguments["unreserve"] \ and arguments["fixed"]: fixed_ip = arguments["FIXED_IP"] result = Network.unreserve_fixed_ip(cloudname=cloudname, fixed_ip_addr=fixed_ip) if result is not None: Console.ok("Un-Reserve fixed ip address [{}] complete.".format(fixed_ip)) # Associate floating IP elif arguments["associate"] \ and arguments["floating"]: # Get all command-line arguments group_name = arguments["--group"] instance_id = arguments["--instance"] floating_ip = arguments["FLOATING_IP"] print(floating_ip) # group supplied if group_name is not None: """ Group name has been provided. Assign floating IPs to all vms in the group and return """ # Get the group information group = Group.get_info(name=group_name, cloud=cloudname, output="json") if group is not None: # Convert from str to json group = json.loads(group) # For each vm in the group # Create and assign a floating IP for item in group: instance_id = group[item]["value"] # Get the instance dict instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" # Get the instance name instance_name = instance_dict["name"] floating_ip = Network.create_assign_floating_ip(cloudname=cloudname, instance_name=instance_name) if floating_ip is not None: Console.ok("Created and assigned Floating IP [{}] to instance [{}]." .format(floating_ip, instance_name)) # Refresh VM in db self.refresh_vm(cloudname) else: Console.error("No group [{}] in the Cloudmesh database." .format(group_name)) return "" # floating-ip not supplied, instance-id supplied elif len(floating_ip)==0 and instance_id is not None: """ Floating IP has not been provided, instance-id provided. Generate one from the pool, and assign to vm and return """ instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" instance_name = instance_dict["name"] floating_ip = Network.create_assign_floating_ip(cloudname=cloudname, instance_name=instance_name) if floating_ip is not None: Console.ok("Created and assigned Floating IP [{}] to instance [{}]." .format(floating_ip, instance_name)) # instance-id & floating-ip supplied elif instance_id is not None: """ Floating IP & Instance ID have been provided Associate the IP to the instance and return """ instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) floating_ip = floating_ip[0] # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" instance_name = instance_dict["name"] result = Network.associate_floating_ip(cloudname=cloudname, instance_name=instance_name, floating_ip=floating_ip) if result is not None: Console.ok("Associated Floating IP [{}] to instance [{}]." .format(floating_ip, instance_name)) # Invalid parameters else: Console.error("Please provide at least one of [--group] OR [--instance] parameters.\n" "You can also provide [FLOATING_IP] AND [--instance] parameters.\n" "See 'cm network --help' for more info.") return "" # Refresh VM in db self.refresh_vm(cloudname) elif arguments["disassociate"] \ and arguments["floating"]: # Get all command-line arguments group_name = arguments["--group"] instance_id = arguments["--instance"] floating_ip = arguments["FLOATING_IP"] # group supplied if group_name is not None: """ Group name has been provided. Remove floating IPs of all vms in the group and return """ # Get the group information group = Group.get_info(name=group_name, cloud=cloudname, output="json") if group is not None: # Convert from str to json group = json.loads(group) # For each vm in the group # Create and assign a floating IP for item in group: instance_id = group[item]["value"] # Get the instance dict instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" # Get the instance name instance_name = instance_dict["name"] floating_ip = instance_dict["floating_ip"] # Floating ip argument invalid if floating_ip is None: Console.error("Instance[{}] does not have a floating_ip." .format(instance_name)) return "" result = Network.disassociate_floating_ip(cloudname=cloudname, instance_name=instance_name, floating_ip=floating_ip) if result is not None: Console.ok("Disassociated Floating IP [{}] from instance [{}]." .format(floating_ip, instance_name)) else: Console.error("No group [{}] in the Cloudmesh database." .format(group_name)) return "" # floating-ip not supplied, instance-id supplied elif len(floating_ip)==0 and instance_id is not None: """ Floating IP has not been provided, instance-id provided. Remove floating ip allocated to vm and return """ instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" instance_name = instance_dict["name"] floating_ip = instance_dict["floating_ip"] # Floating ip argument invalid if floating_ip is None: Console.error("Instance[{}] does not have a floating_ip." .format(instance_name)) return "" result = Network.disassociate_floating_ip(cloudname=cloudname, instance_name=instance_name, floating_ip=floating_ip) if result is not None: Console.ok("Disassociated Floating IP [{}] from instance [{}]." .format(floating_ip, instance_name)) # instance-id & floating-ip supplied elif instance_id is not None: """ Floating IP & Instance ID have been provided Remove the IP from the instance and return """ instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) floating_ip = floating_ip[0] # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" instance_name = instance_dict["name"] _floating_ip = instance_dict["floating_ip"] # Floating ip argument invalid if _floating_ip != floating_ip: Console.error("Invalid floating_ip [{}] for instance [{}]." .format(floating_ip, instance_name)) return "" result = Network.disassociate_floating_ip(cloudname=cloudname, instance_name=instance_name, floating_ip=floating_ip) if result is not None: Console.ok("Disassociated Floating IP [{}] from instance [{}]." .format(floating_ip, instance_name)) # Invalid parameters else: Console.error("Please provide at least one of [--group] OR [--instance] parameters.\n" "You can also provide [FLOATING_IP] AND [--instance] parameters.\n" "See 'cm network --help' for more info.") return "" # Refresh VM in db self.refresh_vm(cloudname) # Create new floating ip under floating pool elif arguments["create"] \ and arguments["floating"]: floating_pool = arguments["--pool"] result = Network.create_floating_ip(cloudname=cloudname, floating_pool=floating_pool) if result is not None: Console.ok("Created new floating IP [{}]".format(result)) else: Console.error("Failed to create floating IP! Please check arguments.") # Delete a floating ip address elif arguments["delete"] \ and arguments["floating"]: floating_ips = arguments["FLOATING_IP"] for floating_ip in floating_ips: result = Network.delete_floating_ip(cloudname=cloudname, floating_ip_or_id=floating_ip) if result is not None: Console.ok(result) else: Console.error("Failed to delete floating IP address!") # Floating IP Pool List elif arguments["list"] \ and arguments["floating"] \ and arguments["pool"]: result = Network.list_floating_ip_pool(cloudname) Console.msg(result) # Floating IP list [or info] elif arguments["list"] \ and arguments["floating"]: ip_or_id = arguments["IP_OR_ID"] instance_id = arguments["--instance"] # Refresh VM in db self.refresh_vm(cloudname) # If instance id is supplied if instance_id is not None: instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" # Read the floating_ip from the dict ip_or_id = instance_dict["floating_ip"] if ip_or_id is None: Console.error("Instance with ID [{}] does not have a floating IP address!" .format(instance_id)) return "" # If the floating ip or associated ID is supplied if ip_or_id is not None: result = Network.get_floating_ip(cloudname, floating_ip_or_id=ip_or_id) if result is not None: Console.msg(result) else: Console.error("Floating IP not found! Please check your arguments.") return "" # Retrieve the full list else: result = Network.list_floating_ip(cloudname) Console.msg(result) # Create a virtual cluster elif arguments["cluster"] and \ arguments["create"]: group_name = arguments["--group"] or \ Default.get("group", cloud=cloudname) # Get the group information group = Group.get_info(name=group_name, cloud=cloudname, output="json") if group is not None: # Convert from str to json group = json.loads(group) # var contains pub key of all vms public_keys = "" login_users = [] login_ips = [] # For each vm in the group # Create and assign a floating IP for item in group: instance_id = group[item]["value"] # Get the instance dict instance_dict = Network.get_instance_dict(cloudname=cloudname, instance_id=instance_id) # Instance not found if instance_dict is None: Console.error("Instance [{}] not found in the cloudmesh database!" .format(instance_id)) return "" # Get the instance name instance_name = instance_dict["name"] floating_ip = instance_dict["floating_ip"] # If vm does not have floating ip, then create if floating_ip is None: floating_ip = Network.create_assign_floating_ip(cloudname=cloudname, instance_name=instance_name) if floating_ip is not None: Console.ok("Created and assigned Floating IP [{}] to instance [{}]." .format(floating_ip, instance_name)) # Refresh VM in db self.refresh_vm(cloudname) # Get the login user for this machine user = raw_input("Enter the login user for VM {} : ".format(instance_name)) passphrase = getpass.getpass("Enter the passphrase key on VM {} : ".format(instance_name)) # create list for second iteration login_users.append(user) login_ips.append(floating_ip) login_args = [ user + "@" + floating_ip, ] keygen_args = [ "ssh-keygen -t rsa -f ~/.ssh/id_rsa -N " + passphrase ] cat_pubkey_args = [ "cat ~/.ssh/id_rsa.pub" ] generate_keypair = login_args + keygen_args result = Shell.ssh(*generate_keypair) #print("***** Keygen *****") #print(result) cat_public_key = login_args + cat_pubkey_args result = Shell.ssh(*cat_public_key) public_keys += "\n" + result #print("***** id_rsa.pub *****") #print(result) #print("***** public keys *****") #print(public_keys) for user, ip in zip(login_users, login_ips): arguments = [ user + "@" + ip, "echo '" + public_keys + "' >> ~/.ssh/authorized_keys" ] # copy the public key contents to auth_keys result = Shell.ssh(*arguments) Console.ok("Virtual cluster creation successfull.") else: Console.error("No group [{}] in the Cloudmesh database." .format(group_name)) return "" return ""
def qstat(host): return Shell.ssh(host, "qstat").rstrip()
def run(cls, cluster, cmd, **kwargs): print ("CCCC>", cluster) # determine the script name.. # # TODO: script count is variable in data base, we test if fil exists and if it # does increase counter till we find one that does not, that will be new counter. # new couner will than be placed in db. # # define get_script_name(dirctory, prefix, counter) # there maybe s a similar thing already in the old cloudmesh # # if not kwargs['-name']: # # old_count = Shell.ssh(cluster, # "ls {}*.sh | wc -l | sed 's/$/ count/'". # format(username)) # c = [f for f in old_count.splitlines() if 'count' in f] # script_count = c[0].split()[0] # else: # script_count = kwargs['-name'] config = cls.read_config(cluster) cls.incr() data = { "cluster": cluster, "count": cls.counter(), "username": config["credentials"]["username"], "remote_experiment_dir": config["default"]["experiment_dir"], "queue": config["default"]["queue"], "id": None, "nodes": 1, "tasks_per_node": 1, } data["script_base_name"] = "{username}-{count}".format(**data) data["script_name"] = "{username}-{count}.sh".format(**data) data["script_output"] = "{username}-{count}.out".format(**data) data["remote_experiment_dir"] = \ "{remote_experiment_dir}/{count}".format(**data).format(**data) # overwrite defaults option_mapping = {'-t': '{tasks_per_node}'.format(**data), '-N': '{nodes}'.format(**data), '-p': '{queue}'.format(**data), '-o': '{script_output}'.format(**data), '-D': '{remote_experiment_dir}'.format(**data)} map(lambda (k, v): option_mapping.__setitem__(k, kwargs.get(k) or v), option_mapping.iteritems()) config = cls.read_config(cluster) project = None try: project = config["credentials"]["project"] if project.lower() not in ["tbd", "none"]: option_mapping["-A"] = project except: pass for key in option_mapping: data[key] = option_mapping[key] # create the options for the script options = "" for key, value in option_mapping.iteritems(): options += '#SBATCH {} {}\n'.format(key, value) data["command"] = cmd data["options"] = options script = textwrap.dedent( """ #! /bin/sh {options} echo '#CLOUDMESH: BATCH ENVIRONMENT' echo 'BASIL_RESERVATION_ID:' $BASIL_RESERVATION_ID echo 'SLURM_CPU_BIND:' $SLURM_CPU_BIND echo 'SLURM_JOB_ID:' $SLURM_JOB_ID echo 'SLURM_JOB_CPUS_PER_NODE:' $SLURM_JOB_CPUS_PER_NODE echo 'SLURM_JOB_DEPENDENCY:' $SLURM_JOB_DEPENDENCY echo 'SLURM_JOB_NAME:' $SLURM_JOB_NAME echo 'SLURM_JOB_NODELIST:' $SLURM_JOB_NODELIST echo 'SLURM_JOB_NUM_NODES:' $SLURM_JOB_NUM_NODES echo 'SLURM_MEM_BIND:' $SLURM_MEM_BIND echo 'SLURM_TASKS_PER_NODE:' $SLURM_TASKS_PER_NODE echo 'MPIRUN_NOALLOCATE:' $MPIRUN_NOALLOCATE echo 'MPIRUN_NOFREE:' $MPIRUN_NOFREE echo 'SLURM_NTASKS_PER_CORE:' $SLURM_NTASKS_PER_CORE echo 'SLURM_NTASKS_PER_NODE:' $SLURM_NTASKS_PER_NODE echo 'SLURM_NTASKS_PER_SOCKET:' $SLURM_NTASKS_PER_SOCKET echo 'SLURM_RESTART_COUNT:' $SLURM_RESTART_COUNT echo 'SLURM_SUBMIT_DIR:' $SLURM_SUBMIT_DIR echo 'MPIRUN_PARTITION:' $MPIRUN_PARTITION srun -l echo '#CLOUDMESH: Starting' srun -l {command} srun -l echo '#CLOUDMESH: Test ok' """ ).format(**data).replace("\r\n", "\n").strip() cls.create_remote_dir(cluster, data["remote_experiment_dir"]) _from = Config.path_expand('~/.cloudmesh/{script_name}'.format(**data)) _to = '{cluster}:{remote_experiment_dir}'.format(**data) data["from"] = _from data["to"] = _to data["script"] = script # write the script to local print(_from) print(_to) with open(_from, 'w') as local_file: local_file.write(script) # copy to remote host Shell.scp(_from, _to) # delete local file # Shell.execute('rm', _from) # import sys; sys.exit() # run the sbatch command cmd = 'sbatch {remote_experiment_dir}/{script_name}'.format(**data) data["cmd"] = cmd print ("CMD>", cmd) result = Shell.ssh(cluster, cmd) data["output"] = result # find id for line in result.split("\n"): print ("LLL>", line) if "Submitted batch job" in line: data["id"] = int(line.replace("Submitted batch job ", "").strip()) break # # HACK, should not depend on Model.py # # from cloudmesh_client.db.model import BATCHJOB # name = "" # BATCHJOB(name, # cluster=data["cluster"], # id=data["id"], # script=data["script"]) # has user and username which seems wrong # here what we have in data and want to store the - options are obviously wrong # and need to be full names # noinspection PyPep8,PyPep8 """ {'-D': '/N/u/gvonlasz/experiment/3', '-N': '1', '-o': 'gvonlasz-3.out', '-p': 'delta', '-t': '1', 'cluster': 'india', 'cmd': 'sbatch /N/u/gvonlasz/experiment/3/gvonlasz-3.sh', 'command': 'uname', 'count': 3, 'from': '/Users/big/.cloudmesh/gvonlasz-3.sh', 'id': 1346, 'options': '#SBATCH -t 1\n#SBATCH -o gvonlasz-3.out\n#SBATCH -N 1\n#SBATCH -p delta\n#SBATCH -D /N/u/gvonlasz/experiment/3\n', 'output': 'Submitted batch job 1346', 'queue': 'delta', 'remote_experiment_dir': '/N/u/gvonlasz/experiment/3', 'script': "#! /bin/sh\n#SBATCH -t 1\n#SBATCH -o gvonlasz-3.out\n#SBATCH -N 1\n#SBATCH -p delta\n#SBATCH -D /N/u/gvonlasz/experiment/3\n\nsrun -l echo '#CLOUDMESH: Starting'\nsrun -l uname\nsrun -l echo '#CLOUDMESH: Test ok'", 'script_base_name': 'gvonlasz-3', 'script_name': 'gvonlasz-3.sh', 'script_output': 'gvonlasz-3.out', 'to': 'india:/N/u/gvonlasz/experiment/3', 'username': '******'} """ """ we also want to store what part of the .out file, BASIL_RESERVATION_ID: SLURM_CPU_BIND: SLURM_JOB_ID: 1351 SLURM_JOB_CPUS_PER_NODE: 12 SLURM_JOB_DEPENDENCY: SLURM_JOB_NAME: gvonlasz-8.sh SLURM_JOB_NODELIST: d001 SLURM_JOB_NUM_NODES: 1 SLURM_MEM_BIND: SLURM_TASKS_PER_NODE: 12 MPIRUN_NOALLOCATE: MPIRUN_NOFREE: SLURM_NTASKS_PER_CORE: SLURM_NTASKS_PER_NODE: SLURM_NTASKS_PER_SOCKET: SLURM_RESTART_COUNT: SLURM_SUBMIT_DIR: /N/u/gvonlasz MPIRUN_PARTITION: so maybe we want to use some of the names here as they reflect the env vars """ # # add data to database # cls.add_db(**data) return data
def test(cls, cluster, time): result = Shell.ssh(cluster, "srun -n1 -t {} echo '#CLOUDMESH: Test ok'".format( time)) return result
def create_remote_dir(cls, cluster, dir): Shell.ssh(cluster, "mkdir -p {dir}".format(dir=dir))
class Experiment(object): @classmethod def rm(cls, cluster, id=None, format=None): data = { "CLUSTER": cluster, "ID": id, } result = None if id is not None: try: result = Shell.ssh(cluster, "rm -rf experiment/{ID}".format(**data)) except Exception, e: pass else: try: result = Shell.ssh(cluster, "rm -rf experiment/*").split("\n") except Exception, e: pass return result @classmethod def list(cls, cluster, id=None, format=None): data = { "CLUSTER": cluster, "ID": id, } result = None if id is not None: try: result = Shell.ssh(cluster, "ls experiment/{ID}".format(**data))