Ejemplo n.º 1
0
    def delete(cls, cluster, job, group=None):
        """
        This method is used to terminate a job with the specified or a group of jobs
        job_id or job_name in a given cluster
        :param group:
        :param cluster: the cluster like comet
        :param job: the job id or name
        :return: success message or error
        """
        try:
            if group is not None:
                # get the job ids from the db
                cm = CloudmeshDatabase()
                arguments = {'cluster': cluster,
                             'group': group}
                db_jobs = cm.find('batchjob',
                                  **arguments)

                list1 = []
                for i in db_jobs:
                    list1.append(db_jobs[i]['job_id'])

                # read active jobs
                active_jobs = json.loads(cls.queue(cluster))
                list2 = []
                for i in active_jobs:
                    list2.append(active_jobs[i]['jobid'])

                # find intersection
                res = set(list1).intersection(set(list2))

                if res is not None:
                    for j in res:
                        cmd = 'scancel {}'.format(str(j))
                        Shell.ssh(cluster, cmd)
                        print("Deleted {}".format(j))

                return "All jobs for group {} killed successfully".format(group)

            else:
                args = 'scancel '
                if job.isdigit():
                    args += job
                else:
                    args += "-n {}".format(job)

                Shell.ssh(cluster, args)
                return "Job {} killed successfully".format(job)
        except Exception as ex:
            print("in exceptio")
            print(ex)
            return ex
Ejemplo n.º 2
0
    def qsub(self, name, host, script, template=None, kind="dict"):
        """
        Executes the qsub command on a given host.
        NOTE this method may not yet be fully implemented

        :param name: name of the script
        :param host: host on which the script is to be run
        :param script: The name of the script
        :param template: The script is wrapped into a template
        :param kind: The return is passed as dict, yaml, xml
        :return:
        """
        self.jobid_incr()
        jobscript = self.create_script(name, script, template)

        # copy the script to the remote host
        self._write_to_file(jobscript, name)
        # copy script to remote host

        remote_path = self.data.get("cloudmesh", "pbs", host, "scripts")

        print(remote_path)
        xmkdir(host, remote_path)

        manager_host = self.manager(host)

        # call qsub on the remot host
        r = Shell.scp(name, manager_host + ":" + remote_path)
        jobid = Shell.ssh(manager_host,
                          "qsub {0}/{1}".format(remote_path, name)).rstrip()

        return self.jobstatus(host, jobid, kind=kind)
Ejemplo n.º 3
0
    def qsub(self, name, host, script, template=None, kind="dict"):
        """
        Executes the qsub command on a given host.
        NOTE this method may not yet be fully implemented

        :param name: name of the script
        :param host: host on which the script is to be run
        :param script: The name of the script
        :param template: The script is wrapped into a template
        :param kind: The return is passed as dict, yaml, xml
        :return:
        """
        self.jobid_incr()
        jobscript = self.create_script(name, script, template)

        # copy the script to the remote host
        self._write_to_file(jobscript, name)
        # copy script to remote host

        remote_path = self.data.get("cloudmesh", "pbs", host, "scripts")

        print(remote_path)
        xmkdir(host, remote_path)

        manager_host = self.manager(host)

        # call qsub on the remot host
        r = Shell.scp(name, manager_host + ":" + remote_path)
        jobid = Shell.ssh(manager_host, "qsub {0}/{1}".format(remote_path, name)).rstrip()

        return self.jobstatus(host, jobid, kind=kind)
Ejemplo n.º 4
0
 def execute(self, name, command):
     """executes the command on the named host"""
     if name in ["localhost"]:
         r = '\n'.join(Shell.sh("-c", command).split()[-1:])
     else:
         r = '\n'.join(Shell.ssh(name, command).split()[-1:])
     return r
Ejemplo n.º 5
0
 def execute(self, name, command):
     """executes the command on the named host"""
     if name in ["localhost"]:
         r = '\n'.join(Shell.sh("-c", command).split()[-1:])
     else:
         r = '\n'.join(Shell.ssh(name, command).split()[-1:])
     return r
Ejemplo n.º 6
0
    def delete(cls, cluster, job):
        """
        This method is used to terminate a job with the specified
        job_id or job_name in a given cluster
        :param cluster: the cluster like comet
        :param job: the job id or name
        :return: success message or error
        """
        try:
            args = 'scancel '
            if job.isdigit():
                args += job
            else:
                args += "-n {}".format(job)

            Shell.ssh(cluster, args)
            return "Job {} killed successfully".format(job)
        except Exception as ex:
            return ex
Ejemplo n.º 7
0
 def rm(cls, cluster, id=None, format=None):
     data = {
         "CLUSTER": cluster,
         "ID": id,
     }
     result = None
     if id is not None:
         try:
             result = Shell.ssh(cluster, "rm -rf experiment/{ID}".format(**data))
         except Exception, e:
             pass
Ejemplo n.º 8
0
 def list(cls, cluster, id=None, format=None):
     data = {
         "CLUSTER": cluster,
         "ID": id,
     }
     result = None
     if id is not None:
         try:
             result = Shell.ssh(cluster, "ls experiment/{ID}".format(**data))
             result = result.split("\n")
         except Exception, e:
             result = None
Ejemplo n.º 9
0
    def info(cls, cluster, format='json', all=False):

        if all:
            result = Shell.ssh(cluster, 'sinfo --format=\"%all\"')
        else:
            result = Shell.ssh(
                cluster,
                'sinfo --format=\"%P|%a|%l|%D|%t|%N\"')

        # ignor leading lines till header is found
        l = result.splitlines()
        for i, res in enumerate(l):
            if 'PARTITION|AVAIL|' in res:
                result = "\n".join(l[i:])
                break

        parser = TableParser(strip=False)
        d = parser.to_dict(result)

        # add cluster and updated to each entry
        for key in d.keys():
            d[key]['cluster'] = cluster
            d[key]['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        if format == 'json':
            return json.dumps(d, indent=4, separators=(',', ': '))

        else:
            return (dict_printer(d,
                                 order=['cluster',
                                        'partition',
                                        'avail',
                                        'timelimit',
                                        'nodes',
                                        'state',
                                        'nodelist',
                                        'updated'],
                                 output=format))
Ejemplo n.º 10
0
    def nodes(self, host, refresh=True):
        """
        returns the information from the command pbsnodes in a dict.

        :param host: the name of the host as specified in the .ssh/config file
        :param refresh: if False, reads returns a cached value
                        if True, issues a new command and refreshes the cach
        :return: information of the pbsnodes command in a dict
        """

        manager_host = self.manager(host)
        if self.pbs_nodes_data is None or refresh:
            try:
                result = Shell.ssh(manager_host, "pbsnodes", "-a")
            except:
                raise RuntimeError(
                    "can not execute pbs nodes on host {0}".format(
                        manager_host))
            pbsinfo = {}
            nodes = result.split("\n\n")
            for node in nodes:
                pbs_data = node.split("\n")
                pbs_data = [e.strip() for e in pbs_data]
                name = pbs_data[0]
                if name != "":
                    pbsinfo[name] = {u'name': name}
                    for element in pbs_data[1:]:
                        try:
                            (attribute, value) = element.split(" = ")
                            if attribute == 'status':
                                status_elements = value.split(",")
                                pbsinfo[name][attribute] = {}
                                for e in status_elements:
                                    (a, v) = e.split("=")
                                    pbsinfo[name][attribute][a] = v
                            elif attribute == 'jobs':
                                pbsinfo[name][attribute] = value.split(',')
                            elif attribute == 'note' and (
                                    value.strip().startswith("{")
                                    or value.strip().startswith("[")):
                                pbsinfo[name][attribute] = literal_eval(value)
                            else:
                                pbsinfo[name][attribute] = value
                        except:
                            pass
            self.pbs_nodes_data = pbsinfo

        return self.pbs_nodes_data
Ejemplo n.º 11
0
    def nodes(self, host, refresh=True):
        """
        returns the information from the command pbsnodes in a dict.

        :param host: the name of the host as specified in the .ssh/config file
        :param refresh: if False, reads returns a cached value
                        if True, issues a new command and refreshes the cach
        :return: information of the pbsnodes command in a dict
        """

        manager_host = self.manager(host)
        if self.pbs_nodes_data is None or refresh:
            try:
                result = Shell.ssh(manager_host, "pbsnodes", "-a")
            except:
                raise RuntimeError(
                    "can not execute pbs nodes on host {0}".format(manager_host))
            pbsinfo = {}
            nodes = result.split("\n\n")
            for node in nodes:
                pbs_data = node.split("\n")
                pbs_data = [e.strip() for e in pbs_data]
                name = pbs_data[0]
                if name != "":
                    pbsinfo[name] = {u'name': name}
                    for element in pbs_data[1:]:
                        try:
                            (attribute, value) = element.split(" = ")
                            if attribute == 'status':
                                status_elements = value.split(",")
                                pbsinfo[name][attribute] = {}
                                for e in status_elements:
                                    (a, v) = e.split("=")
                                    pbsinfo[name][attribute][a] = v
                            elif attribute == 'jobs':
                                pbsinfo[name][attribute] = value.split(',')
                            elif attribute == 'note' and (
                                        value.strip().startswith("{") or value.strip().startswith("[")):
                                pbsinfo[name][attribute] = literal_eval(value)
                            else:
                                pbsinfo[name][attribute] = value
                        except:
                            pass
            self.pbs_nodes_data = pbsinfo

        return self.pbs_nodes_data
Ejemplo n.º 12
0
    def queue(cls, cluster, format='json', job=None):
        try:
            args = 'squeue '
            if job is not None:
                if job.isdigit():
                    args += ' -j {} '.format(job)  # search by job id
                else:
                    args += ' -n {} '.format(job)  # search by job name
            f = '--format=%all'
            args += f
            result = Shell.ssh(cluster, args)

            # TODO: process till header is found...(Need a better way)
            l = result.splitlines()
            for i, res in enumerate(l):
                if 'ACCOUNT|GRES|' in res:
                    result = "\n".join(l[i:])
                    break

            parser = TableParser(strip=True)
            d = parser.to_dict(result)

            # add cluster and updated to each entry
            for key in d.keys():
                d[key]['cluster'] = cluster
                d[key]['updated'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

            if format == 'json':
                return json.dumps(d, indent=4, separators=(',', ': '))

            else:
                return (dict_printer(d,
                                     order=['cluster',
                                            'jobid',
                                            'partition',
                                            'name',
                                            'user',
                                            'st',
                                            'time',
                                            'nodes',
                                            'nodelist',
                                            'updated'],
                                     output=format))
        except Exception as ex:
            return ex
Ejemplo n.º 13
0
 def output(cls, cluster, id=None, format=None):
     data = {
         "CLUSTER": cluster,
         "ID": id,
     }
     result = None
     if id is None:
         ids = list(cluster)
     else:
         ids = [id]
     result = []
     for i in ids:
         try:
             result.append(Shell.ssh(cluster, "cat experiment/{}/*.out".format(i)))
         except:
             result.append("")
     # if result == []:
     #    result = None
     return result
Ejemplo n.º 14
0
    def jobstatus(self, host, jobid, kind='dict'):
        """
        The status of a specific job

        :param host: The host on which the job is running
        :param jobid: The jobid as specified by the queing system
        :param kind: The output can be returned as dict, xml, and yaml
        :return:
        """

        manager_host = self.manager(host)
        qstat_xml_data = Shell.ssh(manager_host, "qstat", "-x", jobid).rstrip()

        if kind == 'xml':
            r = qstat_xml_data
        else:
            r = self.qstat_xml_to_dict(qstat_xml_data)
            r[unicode(jobid)][u"cm_jobid"] = self.jobid
            r[unicode(jobid)]["cm_Variable_list"] = self.variable_list(r)
        if kind == 'yaml':
            r = yaml.dump(r, default_flow_style=False)
        return r
Ejemplo n.º 15
0
    def jobstatus(self, host, jobid, kind='dict'):
        """
        The status of a specific job

        :param host: The host on which the job is running
        :param jobid: The jobid as specified by the queing system
        :param kind: The output can be returned as dict, xml, and yaml
        :return:
        """

        manager_host = self.manager(host)
        qstat_xml_data = Shell.ssh(manager_host, "qstat", "-x", jobid).rstrip()

        if kind == 'xml':
            r = qstat_xml_data
        else:
            r = self.qstat_xml_to_dict(qstat_xml_data)
            r[unicode(jobid)][u"cm_jobid"] = self.jobid
            r[unicode(jobid)]["cm_Variable_list"] = self.variable_list(r)
        if kind == 'yaml':
            r = yaml.dump(r, default_flow_style=False)
        return r
Ejemplo n.º 16
0
    def qstat(self, host, user=True, format='dict'):
        """
        executes the qstat command on a particular host and returns
        the information as dict.

        :param host: The host as specified in ~/.ssh/config
        :param user:  If True, only retirns information for the user
                      If False, all jobs for all users are returned
        :param format:
        :return:
        """
        data = None
        username = self.username(host)
        manager_host = self.manager(host)
        xml_data = Shell.ssh(manager_host, "qstat", "-x").rstrip()

        if format == 'dict':
            data = OpenPBS.qstat_xml_to_dict(xml_data)
            selected_data = {}
            for jobid in data:
                (owner, cm_host) = data[jobid]['Job_Owner'].split('@')
                if not user:
                    selected_data[unicode(jobid)] = data[unicode(jobid)]
                elif owner == username:
                    selected_data[unicode(jobid)] = data[unicode(jobid)]
            data = selected_data
            for jobid in data:
                data[unicode(jobid)][u"cm_jobid"] = jobid
                if "Variable_list" in data[unicode(jobid)]:
                    data[unicode(
                        jobid)][u"cm_Variable_list"] = self.variable_list(
                            data, jobid)
        elif format == "xml":
            if user is not None:
                print("WARNING: "
                      "restrictiong xml data for a user not supported.")
            data = xml_data
        return dict(data)
Ejemplo n.º 17
0
    def qstat(self, host, user=True, format='dict'):
        """
        executes the qstat command on a particular host and returns
        the information as dict.

        :param host: The host as specified in ~/.ssh/config
        :param user:  If True, only retirns information for the user
                      If False, all jobs for all users are returned
        :param format:
        :return:
        """
        data = None
        username = self.username(host)
        manager_host = self.manager(host)
        xml_data = Shell.ssh(manager_host, "qstat", "-x").rstrip()

        if format == 'dict':
            data = OpenPBS.qstat_xml_to_dict(xml_data)
            selected_data = {}
            for jobid in data:
                (owner, cm_host) = data[jobid]['Job_Owner'].split('@')
                if not user:
                    selected_data[unicode(jobid)] = data[unicode(jobid)]
                elif owner == username:
                    selected_data[unicode(jobid)] = data[unicode(jobid)]
            data = selected_data
            for jobid in data:
                data[unicode(jobid)][u"cm_jobid"] = jobid
                if "Variable_list" in data[unicode(jobid)]:
                    data[unicode(jobid)][u"cm_Variable_list"] = self.variable_list(data, jobid)
        elif format == "xml":
            if user is not None:
                print("WARNING: "
                      "restrictiong xml data for a user not supported.")
            data = xml_data
        return dict(data)
Ejemplo n.º 18
0
 def pbsnodes_data(manager_host):
     result = str(
         Shell.ssh(manager_host, "pbsnodes", "-l", "-n"))[:-1]
     return result
Ejemplo n.º 19
0
 def pbsnodes_data(manager_host):
     result = str(Shell.ssh(manager_host, "pbsnodes", "-l", "-n"))[:-1]
     return result
Ejemplo n.º 20
0
    def do_network(self, args, arguments):
        """
        ::

            Usage:
                network get fixed [ip] [--cloud=CLOUD] FIXED_IP
                network get floating [ip] [--cloud=CLOUD] FLOATING_IP_ID
                network reserve fixed [ip] [--cloud=CLOUD] FIXED_IP
                network unreserve fixed [ip] [--cloud=CLOUD] FIXED_IP
                network associate floating [ip] [--cloud=CLOUD] [--group=GROUP] [--instance=INS_ID_OR_NAME] [FLOATING_IP]
                network disassociate floating [ip] [--cloud=CLOUD] [--group=GROUP] [--instance=INS_ID_OR_NAME] [FLOATING_IP]
                network create floating [ip] [--cloud=CLOUD] [--pool=FLOATING_IP_POOL]
                network delete floating [ip] [--cloud=CLOUD] FLOATING_IP...
                network list floating pool [--cloud=CLOUD]
                network list floating [ip] [--cloud=CLOUD] [--instance=INS_ID_OR_NAME] [IP_OR_ID]
                network create cluster --group=demo_group
                network -h | --help

            Options:
                -h                          help message
                --cloud=CLOUD               Name of the IaaS cloud e.g. india_openstack_grizzly.
                --group=GROUP               Name of the group in Cloudmesh
                --pool=FLOATING_IP_POOL     Name of Floating IP Pool
                --instance=INS_ID_OR_NAME   ID or Name of the vm instance

            Arguments:
                IP_OR_ID        IP Address or ID of IP Address
                FIXED_IP        Fixed IP Address, e.g. 10.1.5.2
                FLOATING_IP     Floating IP Address, e.g. 192.1.66.8
                FLOATING_IP_ID  ID associated with Floating IP, e.g. 185c5195-e824-4e7b-8581-703abec4bc01

            Examples:
                $ network get fixed ip --cloud=india 10.1.2.5
                $ network get fixed --cloud=india 10.1.2.5
                $ network get floating ip --cloud=india 185c5195-e824-4e7b-8581-703abec4bc01
                $ network get floating --cloud=india 185c5195-e824-4e7b-8581-703abec4bc01
                $ network reserve fixed ip --cloud=india 10.1.2.5
                $ network reserve fixed --cloud=india 10.1.2.5
                $ network unreserve fixed ip --cloud=india 10.1.2.5
                $ network unreserve fixed --cloud=india 10.1.2.5
                $ network associate floating ip --cloud=india --instance=albert-001 192.1.66.8
                $ network associate floating --cloud=india --instance=albert-001
                $ network associate floating --cloud=india --group=albert_group
                $ network disassociate floating ip --cloud=india --instance=albert-001 192.1.66.8
                $ network disassociate floating --cloud=india --instance=albert-001 192.1.66.8
                $ network create floating ip --cloud=india --pool=albert-f01
                $ network create floating --cloud=india --pool=albert-f01
                $ network delete floating ip --cloud=india 192.1.66.8 192.1.66.9
                $ network delete floating --cloud=india 192.1.66.8 192.1.66.9
                $ network list floating ip --cloud=india
                $ network list floating --cloud=india
                $ network list floating --cloud=india 192.1.66.8
                $ network list floating --cloud=india --instance=323c5195-7yy34-4e7b-8581-703abec4b
                $ network list floating pool --cloud=india
                $ network create cluster --group=demo_group

        """
        # pprint(arguments)
        # Get the cloud parameter OR read default
        cloudname = arguments["--cloud"] or Default.get_cloud()

        if cloudname is None:
            Console.error("Default cloud has not been set!"
                          "Please use the following to set it:\n"
                          "cm default cloud=CLOUDNAME\n"
                          "or provide it via the --cloud=CLOUDNAME argument.")
            return ""

        # Fixed IP info
        if arguments["get"] \
                and arguments["fixed"]:
            fixed_ip = arguments["FIXED_IP"]
            result = Network.get_fixed_ip(cloudname,
                                          fixed_ip_addr=fixed_ip)
            Console.msg(result)

        # Floating IP info
        elif arguments["get"] \
                and arguments["floating"]:
            floating_ip_id = arguments["FLOATING_IP_ID"]
            result = Network.get_floating_ip(cloudname,
                                             floating_ip_or_id=floating_ip_id)
            Console.msg(result)

        # Reserve a fixed ip
        elif arguments["reserve"] \
                and arguments["fixed"]:
            fixed_ip = arguments["FIXED_IP"]
            result = Network.reserve_fixed_ip(cloudname=cloudname,
                                              fixed_ip_addr=fixed_ip)
            if result is not None:
                Console.ok("Reserve fixed ip address [{}] complete.".format(fixed_ip))

        # Un-Reserve a fixed ip
        elif arguments["unreserve"] \
                and arguments["fixed"]:
            fixed_ip = arguments["FIXED_IP"]
            result = Network.unreserve_fixed_ip(cloudname=cloudname,
                                                fixed_ip_addr=fixed_ip)
            if result is not None:
                Console.ok("Un-Reserve fixed ip address [{}] complete.".format(fixed_ip))

        # Associate floating IP
        elif arguments["associate"] \
                and arguments["floating"]:

            # Get all command-line arguments
            group_name = arguments["--group"]
            instance_id = arguments["--instance"]
            floating_ip = arguments["FLOATING_IP"]

            print(floating_ip)

            # group supplied
            if group_name is not None:
                """
                Group name has been provided.
                Assign floating IPs to all vms in the group
                and return
                """
                # Get the group information
                group = Group.get_info(name=group_name,
                                       cloud=cloudname,
                                       output="json")
                if group is not None:
                    # Convert from str to json
                    group = json.loads(group)
                    # For each vm in the group
                    # Create and assign a floating IP
                    for item in group:
                        instance_id = group[item]["value"]
                        # Get the instance dict
                        instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                                  instance_id=instance_id)
                        # Instance not found
                        if instance_dict is None:
                            Console.error("Instance [{}] not found in the cloudmesh database!"
                                          .format(instance_id))
                            return ""

                        # Get the instance name
                        instance_name = instance_dict["name"]
                        floating_ip = Network.create_assign_floating_ip(cloudname=cloudname,
                                                                        instance_name=instance_name)
                        if floating_ip is not None:
                            Console.ok("Created and assigned Floating IP [{}] to instance [{}]."
                                       .format(floating_ip, instance_name))
                            # Refresh VM in db
                            self.refresh_vm(cloudname)
                else:
                    Console.error("No group [{}] in the Cloudmesh database."
                                  .format(group_name))
                    return ""

            # floating-ip not supplied, instance-id supplied
            elif len(floating_ip)==0 and instance_id is not None:
                """
                Floating IP has not been provided, instance-id provided.
                Generate one from the pool, and assign to vm
                and return
                """
                instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                          instance_id=instance_id)
                # Instance not found
                if instance_dict is None:
                    Console.error("Instance [{}] not found in the cloudmesh database!"
                                  .format(instance_id))
                    return ""

                instance_name = instance_dict["name"]
                floating_ip = Network.create_assign_floating_ip(cloudname=cloudname,
                                                                instance_name=instance_name)
                if floating_ip is not None:
                    Console.ok("Created and assigned Floating IP [{}] to instance [{}]."
                               .format(floating_ip, instance_name))

            # instance-id & floating-ip supplied
            elif instance_id is not None:
                """
                Floating IP & Instance ID have been provided
                Associate the IP to the instance
                and return
                """
                instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                          instance_id=instance_id)
                floating_ip = floating_ip[0]

                # Instance not found
                if instance_dict is None:
                    Console.error("Instance [{}] not found in the cloudmesh database!"
                                  .format(instance_id))
                    return ""

                instance_name = instance_dict["name"]
                result = Network.associate_floating_ip(cloudname=cloudname,
                                                       instance_name=instance_name,
                                                       floating_ip=floating_ip)
                if result is not None:
                    Console.ok("Associated Floating IP [{}] to instance [{}]."
                               .format(floating_ip, instance_name))

            # Invalid parameters
            else:
                Console.error("Please provide at least one of [--group] OR [--instance] parameters.\n"
                              "You can also provide [FLOATING_IP] AND [--instance] parameters.\n"
                              "See 'cm network --help' for more info.")
                return ""

            # Refresh VM in db
            self.refresh_vm(cloudname)

        elif arguments["disassociate"] \
                and arguments["floating"]:

            # Get all command-line arguments
            group_name = arguments["--group"]
            instance_id = arguments["--instance"]
            floating_ip = arguments["FLOATING_IP"]

            # group supplied
            if group_name is not None:
                """
                Group name has been provided.
                Remove floating IPs of all vms in the group
                and return
                """
                # Get the group information
                group = Group.get_info(name=group_name,
                                       cloud=cloudname,
                                       output="json")
                if group is not None:
                    # Convert from str to json
                    group = json.loads(group)
                    # For each vm in the group
                    # Create and assign a floating IP
                    for item in group:
                        instance_id = group[item]["value"]
                        # Get the instance dict
                        instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                                  instance_id=instance_id)
                        # Instance not found
                        if instance_dict is None:
                            Console.error("Instance [{}] not found in the cloudmesh database!"
                                          .format(instance_id))
                            return ""

                        # Get the instance name
                        instance_name = instance_dict["name"]
                        floating_ip = instance_dict["floating_ip"]

                        # Floating ip argument invalid
                        if floating_ip is None:
                            Console.error("Instance[{}] does not have a floating_ip."
                                          .format(instance_name))
                            return ""

                        result = Network.disassociate_floating_ip(cloudname=cloudname,
                                                                  instance_name=instance_name,
                                                                  floating_ip=floating_ip)
                        if result is not None:
                            Console.ok("Disassociated Floating IP [{}] from instance [{}]."
                                       .format(floating_ip, instance_name))
                else:
                    Console.error("No group [{}] in the Cloudmesh database."
                                  .format(group_name))
                    return ""

            # floating-ip not supplied, instance-id supplied
            elif len(floating_ip)==0 and instance_id is not None:
                """
                Floating IP has not been provided, instance-id provided.
                Remove floating ip allocated to vm
                and return
                """
                instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                          instance_id=instance_id)
                # Instance not found
                if instance_dict is None:
                    Console.error("Instance [{}] not found in the cloudmesh database!"
                                  .format(instance_id))
                    return ""

                instance_name = instance_dict["name"]
                floating_ip = instance_dict["floating_ip"]

                # Floating ip argument invalid
                if floating_ip is None:
                    Console.error("Instance[{}] does not have a floating_ip."
                                  .format(instance_name))
                    return ""

                result = Network.disassociate_floating_ip(cloudname=cloudname,
                                                          instance_name=instance_name,
                                                          floating_ip=floating_ip)
                if result is not None:
                    Console.ok("Disassociated Floating IP [{}] from instance [{}]."
                               .format(floating_ip, instance_name))

            # instance-id & floating-ip supplied
            elif instance_id is not None:
                """
                Floating IP & Instance ID have been provided
                Remove the IP from the instance
                and return
                """
                instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                          instance_id=instance_id)
                floating_ip = floating_ip[0]
                
                # Instance not found
                if instance_dict is None:
                    Console.error("Instance [{}] not found in the cloudmesh database!"
                                  .format(instance_id))
                    return ""

                instance_name = instance_dict["name"]
                _floating_ip = instance_dict["floating_ip"]

                # Floating ip argument invalid
                if _floating_ip != floating_ip:
                    Console.error("Invalid floating_ip [{}] for instance [{}]."
                                  .format(floating_ip, instance_name))
                    return ""

                result = Network.disassociate_floating_ip(cloudname=cloudname,
                                                          instance_name=instance_name,
                                                          floating_ip=floating_ip)
                if result is not None:
                    Console.ok("Disassociated Floating IP [{}] from instance [{}]."
                               .format(floating_ip, instance_name))

            # Invalid parameters
            else:
                Console.error("Please provide at least one of [--group] OR [--instance] parameters.\n"
                              "You can also provide [FLOATING_IP] AND [--instance] parameters.\n"
                              "See 'cm network --help' for more info.")
                return ""

            # Refresh VM in db
            self.refresh_vm(cloudname)

        # Create new floating ip under floating pool
        elif arguments["create"] \
                and arguments["floating"]:
            floating_pool = arguments["--pool"]
            result = Network.create_floating_ip(cloudname=cloudname,
                                                floating_pool=floating_pool)
            if result is not None:
                Console.ok("Created new floating IP [{}]".format(result))
            else:
                Console.error("Failed to create floating IP! Please check arguments.")

        # Delete a floating ip address
        elif arguments["delete"] \
                and arguments["floating"]:
            floating_ips = arguments["FLOATING_IP"]

            for floating_ip in floating_ips:
                result = Network.delete_floating_ip(cloudname=cloudname,
                                                    floating_ip_or_id=floating_ip)

                if result is not None:
                    Console.ok(result)
                else:
                    Console.error("Failed to delete floating IP address!")

        # Floating IP Pool List
        elif arguments["list"] \
                and arguments["floating"] \
                and arguments["pool"]:
            result = Network.list_floating_ip_pool(cloudname)
            Console.msg(result)

        # Floating IP list [or info]
        elif arguments["list"] \
                and arguments["floating"]:

            ip_or_id = arguments["IP_OR_ID"]
            instance_id = arguments["--instance"]

            # Refresh VM in db
            self.refresh_vm(cloudname)

            # If instance id is supplied
            if instance_id is not None:
                instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                          instance_id=instance_id)
                # Instance not found
                if instance_dict is None:
                    Console.error("Instance [{}] not found in the cloudmesh database!"
                                  .format(instance_id))
                    return ""

                # Read the floating_ip from the dict
                ip_or_id = instance_dict["floating_ip"]

                if ip_or_id is None:
                    Console.error("Instance with ID [{}] does not have a floating IP address!"
                                  .format(instance_id))
                    return ""

            # If the floating ip or associated ID is supplied
            if ip_or_id is not None:
                result = Network.get_floating_ip(cloudname,
                                                 floating_ip_or_id=ip_or_id)

                if result is not None:
                    Console.msg(result)
                else:
                    Console.error("Floating IP not found! Please check your arguments.")
                    return ""
            # Retrieve the full list
            else:
                result = Network.list_floating_ip(cloudname)
                Console.msg(result)

        # Create a virtual cluster
        elif arguments["cluster"] and \
                arguments["create"]:

            group_name = arguments["--group"] or \
                         Default.get("group", cloud=cloudname)

            # Get the group information
            group = Group.get_info(name=group_name,
                                   cloud=cloudname,
                                   output="json")
            if group is not None:
                # Convert from str to json
                group = json.loads(group)

                # var contains pub key of all vms
                public_keys = ""
                login_users = []
                login_ips = []

                # For each vm in the group
                # Create and assign a floating IP
                for item in group:
                    instance_id = group[item]["value"]
                    # Get the instance dict
                    instance_dict = Network.get_instance_dict(cloudname=cloudname,
                                                              instance_id=instance_id)
                    # Instance not found
                    if instance_dict is None:
                        Console.error("Instance [{}] not found in the cloudmesh database!"
                                      .format(instance_id))
                        return ""

                    # Get the instance name
                    instance_name = instance_dict["name"]
                    floating_ip = instance_dict["floating_ip"]

                    # If vm does not have floating ip, then create
                    if floating_ip is None:
                        floating_ip = Network.create_assign_floating_ip(cloudname=cloudname,
                                                                        instance_name=instance_name)
                        if floating_ip is not None:
                            Console.ok("Created and assigned Floating IP [{}] to instance [{}]."
                                       .format(floating_ip, instance_name))
                            # Refresh VM in db
                            self.refresh_vm(cloudname)

                    # Get the login user for this machine
                    user = raw_input("Enter the login user for VM {} : ".format(instance_name))
                    passphrase = getpass.getpass("Enter the passphrase key on VM {} : ".format(instance_name))

                    # create list for second iteration
                    login_users.append(user)
                    login_ips.append(floating_ip)

                    login_args = [
                        user + "@" + floating_ip,
                    ]

                    keygen_args = [
                        "ssh-keygen -t rsa -f ~/.ssh/id_rsa -N " + passphrase
                    ]

                    cat_pubkey_args = [
                        "cat ~/.ssh/id_rsa.pub"
                    ]

                    generate_keypair = login_args + keygen_args
                    result = Shell.ssh(*generate_keypair)

                    #print("***** Keygen *****")
                    #print(result)

                    cat_public_key = login_args + cat_pubkey_args
                    result = Shell.ssh(*cat_public_key)
                    public_keys += "\n" + result

                    #print("***** id_rsa.pub *****")
                    #print(result)

                #print("***** public keys *****")
                #print(public_keys)

                for user, ip in zip(login_users, login_ips):
                    arguments = [
                        user + "@" + ip,
                        "echo '" + public_keys + "' >> ~/.ssh/authorized_keys"
                    ]

                    # copy the public key contents to auth_keys
                    result = Shell.ssh(*arguments)

                Console.ok("Virtual cluster creation successfull.")
            else:
                Console.error("No group [{}] in the Cloudmesh database."
                              .format(group_name))
                return ""

        return ""
Ejemplo n.º 21
0
 def qstat(host):
     return Shell.ssh(host, "qstat").rstrip()
Ejemplo n.º 22
0
    def run(cls, cluster, cmd, **kwargs):

        print ("CCCC>", cluster)
        # determine the script name..

        #
        # TODO: script count is variable in data base, we test if fil exists and if it
        # does increase counter till we find one that does not, that will be new counter.
        # new couner will than be placed in db.
        #
        # define get_script_name(dirctory, prefix, counter)
        # there maybe s a similar thing already in the old cloudmesh
        #

        # if not kwargs['-name']:
        #
        #    old_count = Shell.ssh(cluster,
        #                          "ls {}*.sh | wc -l | sed 's/$/ count/'".
        #                          format(username))
        #    c = [f for f in old_count.splitlines() if 'count' in f]
        #    script_count = c[0].split()[0]
        # else:
        #    script_count = kwargs['-name']

        config = cls.read_config(cluster)
        cls.incr()
        data = {
            "cluster": cluster,
            "count": cls.counter(),
            "username": config["credentials"]["username"],
            "remote_experiment_dir": config["default"]["experiment_dir"],
            "queue": config["default"]["queue"],
            "id": None,
            "nodes": 1,
            "tasks_per_node": 1,
        }
        data["script_base_name"] = "{username}-{count}".format(**data)
        data["script_name"] = "{username}-{count}.sh".format(**data)
        data["script_output"] = "{username}-{count}.out".format(**data)
        data["remote_experiment_dir"] = \
            "{remote_experiment_dir}/{count}".format(**data).format(**data)

        # overwrite defaults
        option_mapping = {'-t': '{tasks_per_node}'.format(**data),
                          '-N': '{nodes}'.format(**data),
                          '-p': '{queue}'.format(**data),
                          '-o': '{script_output}'.format(**data),
                          '-D': '{remote_experiment_dir}'.format(**data)}

        map(lambda (k, v):
            option_mapping.__setitem__(k, kwargs.get(k) or v),
            option_mapping.iteritems())

        config = cls.read_config(cluster)
        project = None
        try:
            project = config["credentials"]["project"]
            if project.lower() not in ["tbd", "none"]:
                option_mapping["-A"] = project
        except:
            pass

        for key in option_mapping:
            data[key] = option_mapping[key]

        # create the options for the script
        options = ""
        for key, value in option_mapping.iteritems():
            options += '#SBATCH {} {}\n'.format(key, value)

        data["command"] = cmd
        data["options"] = options

        script = textwrap.dedent(
            """
            #! /bin/sh
            {options}
            echo '#CLOUDMESH: BATCH ENVIRONMENT'
            echo 'BASIL_RESERVATION_ID:' $BASIL_RESERVATION_ID
            echo 'SLURM_CPU_BIND:' $SLURM_CPU_BIND
            echo 'SLURM_JOB_ID:' $SLURM_JOB_ID
            echo 'SLURM_JOB_CPUS_PER_NODE:' $SLURM_JOB_CPUS_PER_NODE
            echo 'SLURM_JOB_DEPENDENCY:' $SLURM_JOB_DEPENDENCY
            echo 'SLURM_JOB_NAME:' $SLURM_JOB_NAME
            echo 'SLURM_JOB_NODELIST:' $SLURM_JOB_NODELIST
            echo 'SLURM_JOB_NUM_NODES:' $SLURM_JOB_NUM_NODES
            echo 'SLURM_MEM_BIND:' $SLURM_MEM_BIND
            echo 'SLURM_TASKS_PER_NODE:' $SLURM_TASKS_PER_NODE
            echo 'MPIRUN_NOALLOCATE:' $MPIRUN_NOALLOCATE
            echo 'MPIRUN_NOFREE:' $MPIRUN_NOFREE
            echo 'SLURM_NTASKS_PER_CORE:' $SLURM_NTASKS_PER_CORE
            echo 'SLURM_NTASKS_PER_NODE:' $SLURM_NTASKS_PER_NODE
            echo 'SLURM_NTASKS_PER_SOCKET:' $SLURM_NTASKS_PER_SOCKET
            echo 'SLURM_RESTART_COUNT:' $SLURM_RESTART_COUNT
            echo 'SLURM_SUBMIT_DIR:' $SLURM_SUBMIT_DIR
            echo 'MPIRUN_PARTITION:' $MPIRUN_PARTITION
            srun -l echo '#CLOUDMESH: Starting'
            srun -l {command}
            srun -l echo '#CLOUDMESH: Test ok'
            """
        ).format(**data).replace("\r\n", "\n").strip()

        cls.create_remote_dir(cluster, data["remote_experiment_dir"])

        _from = Config.path_expand('~/.cloudmesh/{script_name}'.format(**data))
        _to = '{cluster}:{remote_experiment_dir}'.format(**data)
        data["from"] = _from
        data["to"] = _to
        data["script"] = script
        # write the script to local
        print(_from)
        print(_to)

        with open(_from, 'w') as local_file:
            local_file.write(script)

        # copy to remote host
        Shell.scp(_from, _to)

        # delete local file
        # Shell.execute('rm', _from)
        # import sys; sys.exit()

        # run the sbatch command

        cmd = 'sbatch {remote_experiment_dir}/{script_name}'.format(**data)
        data["cmd"] = cmd

        print ("CMD>", cmd)
        result = Shell.ssh(cluster, cmd)

        data["output"] = result

        # find id
        for line in result.split("\n"):
            print ("LLL>", line)
            if "Submitted batch job" in line:
                data["id"] = int(line.replace("Submitted batch job ", "").strip())
                break


        #
        # HACK, should not depend on Model.py
        #

        # from cloudmesh_client.db.model import BATCHJOB
        # name = ""
        # BATCHJOB(name,
        #         cluster=data["cluster"],
        #         id=data["id"],
        #         script=data["script"]) # has user and username which seems wrong

        # here what we have in data and want to store the - options are obviously wrong
        # and need to be full names
        # noinspection PyPep8,PyPep8
        """
                {'-D': '/N/u/gvonlasz/experiment/3',
                 '-N': '1',
                 '-o': 'gvonlasz-3.out',
                 '-p': 'delta',
                 '-t': '1',
                 'cluster': 'india',
                 'cmd': 'sbatch /N/u/gvonlasz/experiment/3/gvonlasz-3.sh',
                 'command': 'uname',
                 'count': 3,
                 'from': '/Users/big/.cloudmesh/gvonlasz-3.sh',
                 'id': 1346,
                 'options': '#SBATCH -t 1\n#SBATCH -o gvonlasz-3.out\n#SBATCH -N 1\n#SBATCH -p delta\n#SBATCH -D /N/u/gvonlasz/experiment/3\n',
                 'output': 'Submitted batch job 1346',
                 'queue': 'delta',
                 'remote_experiment_dir': '/N/u/gvonlasz/experiment/3',
                 'script': "#! /bin/sh\n#SBATCH -t 1\n#SBATCH -o gvonlasz-3.out\n#SBATCH -N 1\n#SBATCH -p delta\n#SBATCH -D /N/u/gvonlasz/experiment/3\n\nsrun -l echo '#CLOUDMESH: Starting'\nsrun -l uname\nsrun -l echo '#CLOUDMESH: Test ok'",
                 'script_base_name': 'gvonlasz-3',
                 'script_name': 'gvonlasz-3.sh',
                 'script_output': 'gvonlasz-3.out',
                 'to': 'india:/N/u/gvonlasz/experiment/3',
                 'username': '******'}
                """

        """
        we also want to store what part of the .out file,

        BASIL_RESERVATION_ID:
        SLURM_CPU_BIND:
        SLURM_JOB_ID: 1351
        SLURM_JOB_CPUS_PER_NODE: 12
        SLURM_JOB_DEPENDENCY:
        SLURM_JOB_NAME: gvonlasz-8.sh
        SLURM_JOB_NODELIST: d001
        SLURM_JOB_NUM_NODES: 1
        SLURM_MEM_BIND:
        SLURM_TASKS_PER_NODE: 12
        MPIRUN_NOALLOCATE:
        MPIRUN_NOFREE:
        SLURM_NTASKS_PER_CORE:
        SLURM_NTASKS_PER_NODE:
        SLURM_NTASKS_PER_SOCKET:
        SLURM_RESTART_COUNT:
        SLURM_SUBMIT_DIR: /N/u/gvonlasz
        MPIRUN_PARTITION:

        so maybe we want to use some of the names here as they reflect the env vars
        """

        #
        # add data to database
        #
        cls.add_db(**data)

        return data
Ejemplo n.º 23
0
 def test(cls, cluster, time):
     result = Shell.ssh(cluster,
                        "srun -n1 -t {} echo '#CLOUDMESH: Test ok'".format(
                            time))
     return result
Ejemplo n.º 24
0
 def qstat(host):
     return Shell.ssh(host, "qstat").rstrip()
Ejemplo n.º 25
0
 def create_remote_dir(cls, cluster, dir):
     Shell.ssh(cluster, "mkdir -p {dir}".format(dir=dir))
Ejemplo n.º 26
0
class Experiment(object):
    @classmethod
    def rm(cls, cluster, id=None, format=None):
        data = {
            "CLUSTER": cluster,
            "ID": id,
        }
        result = None
        if id is not None:
            try:
                result = Shell.ssh(cluster, "rm -rf experiment/{ID}".format(**data))
            except Exception, e:
                pass
        else:
            try:
                result = Shell.ssh(cluster, "rm -rf experiment/*").split("\n")
            except Exception, e:
                pass

        return result

    @classmethod
    def list(cls, cluster, id=None, format=None):
        data = {
            "CLUSTER": cluster,
            "ID": id,
        }
        result = None
        if id is not None:
            try:
                result = Shell.ssh(cluster, "ls experiment/{ID}".format(**data))