예제 #1
0
    def ExistingSecurityGroup(self, args):
        ''' Does the security group name currently exist ? get it if it does'''

        trace(2, "\"%s\"" % (args.nsg_name))

        if (args.nsg_name == "" or args.nsg_name == None
                or args.nsg_name == "None"):
            error("NetworkSecurityGroup name is \"%s\"" % args.nsg_name)
            return 1

            # Is there a better way to do this than to pull in the entire dictionary
            # and iterate through the keys?

        cmd = "aws ec2 describe-security-groups "  # build the AWS command to create an instance
        cmd += " --region %s" % args.region  # us-west-2

        retcode, output, errval = self.DoCmd(cmd)  # call the AWS command
        if (retcode != 0):  # check for return code
            error("Problems describing security groups")
            return 1
        decoded_output = json.loads(output)

        # number of security groups

        items = len(
            decoded_output["SecurityGroups"])  # number of security groups

        # slow search for name

        for idx in range(0, items):
            if (decoded_output["SecurityGroups"][idx]["GroupName"] ==
                    args.nsg_name):
                args.nsg_id = decoded_output["SecurityGroups"][idx]["GroupId"]
                debug(
                    2, "%2d %-12s \"%s\"" %
                    (idx, decoded_output["SecurityGroups"][idx]["GroupId"],
                     decoded_output["SecurityGroups"][idx]["GroupName"]))
                return 0  # found it

            # returns 1 if did not find security group

        trace(2, "Did not find security group: \"%s\"" % args.nsg_name)
        return 1
예제 #2
0
    def GetIPSetupCorrectly(self, args):
        ''' called after 'running' status to get IP. Does nothing for Alibaba '''

        # On aws, IP address change across stop/start cases.
        #
        # get full description of the instance json record - large
        # from this we can get the public IP address of the instance

        cmd = "aws ec2 describe-instances"
        cmd += " --instance-id %s" % args.vm_id
        cmd += " --region %s" % args.region  # us-west-2

        retcode, output, errval = self.DoCmd(cmd)

        # this return json structure from 'describe-instances' has about 50 values
        # in it that, as the command says, describes the instance. Only need a few
        # of them here.

        decoded_output = json.loads(output)

        args.vm_ip = decoded_output['Reservations'][0]['Instances'][0][
            'PublicDnsName']
        key_name = decoded_output['Reservations'][0]['Instances'][0]['KeyName']

        debug(1, "ip: %s keyname: \"%s\"" % (args.vm_ip, key_name))

        # name of SSH keyfile was sent to Create function when VM was built, and we
        # get a chance to read it back here. Parinoid check to verify that it is
        # the same. This should never happen, but check for safety

        if (key_name != args.key_name):  # cross-check
            error("args.key_name:\"%s\" != version vm thinks its using:\"%s\"",
                  args.key_name, key_name)
            return 1

        return 0
예제 #3
0
    def DoCmdNoError(self, cmd):
        ''' ali specifc Blocking command -- returns command output, doesn't report error'''

        debug(1, cmd)
        self.Log(cmd)

        child = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        output, errval = child.communicate(
        )  # returns data from stdout, stderr
        debug(3, output)  # full output for trace

        # print "cmd:              %s " % cmd
        # print "child.returncode: %d " % child.returncode
        # print "errval:           %s " % errval
        # print "output:\n%s " % output

        # ali error output is in json format -- kind of...
        # {
        #     "Message": "The specified InstanceId does not exist.",
        #     "Code": "InvalidInstanceId.NotFound"
        # }
        # Detail of Server Exception:
        #
        # HTTP Status: 404 Error:InvalidInstanceId.NotFound The specified InstanceId does not exist. RequestID: C66FB5EA-FA09-41B2-AD69-9A68BCCE0B4A

        if child.returncode != 0 and errval == "":
            pos = output.find('}')
            if (pos == -1):
                return (child.returncode, "", errval)

            jsonbuf = output[:pos + 1]  # only the stuff before the first '}'
            decoded_output = json.loads(jsonbuf)
            errval = decoded_output['Message']

        return (child.returncode, output, errval
                )  # pass back retcode, stdout, stderr
예제 #4
0
    def CreateVM(self, args):
        ''' Creates a new VM. 'args' holds parameters '''

        if (args.vm_id != "None" and args.vm_id != None):
            error(
                "Instance \"%s\" already exists, run 'deleteVM' first, or 'clean' if stale arg list"
                % args.vm_id)
            return 1

        args.vm_ip = ""  # make sure IP address is clear

        # ssh key file, builds path from options, checks existance

        retcode = self.CheckSSHKeyFilePath(args, ".pem")
        if (retcode != 0):
            return (retcode)

            # security group, create if neeeded, does nothing if already exists
            # consider moving this step outside this VM create so that better
            # reflects real VM timing?

        self.Inform("CreateNSG")
        if (self.CreateNSG(args) != 0):  # sets args.nsg_id
            return 1
        trace(2, "nsg_id: \"%s\" %s" % (args.nsg_name, args.nsg_id))

        # look up image-name, return region specific image id

        self.Inform("GetImageId")
        if (self.GetImageId(args) != 0):
            return 1
        trace(2, "image_id: \"%s\" %s" % (args.image_name, args.image_id))

        # with security group and image id, we can now create the instance

        self.Inform("run-instances")
        cmd = "aws ec2 run-instances"  # build the AWS command to create an instance
        cmd += " --image-id %s" % args.image_id  # aws image identifer via self.GetImageid()
        cmd += " --instance-type %s" % args.instance_type  # t2.micro
        cmd += " --region %s" % args.region  # us-west-2
        cmd += " --key-name %s" % args.key_name  # my-security-key
        cmd += " --security-group-ids %s" % args.nsg_id  # Security Group

        retcode, output, errval = self.DoCmd(cmd)  # call the AWS command
        if (retcode != 0):  # check for return code
            error("Problems creating VM \"%s\"" % args.vm_name)
            return 1  # nothing to delete, can return

            # decode the JSON output

        decoded_output = json.loads(
            output)  # convert json format to python structure

        trace(3, json.dumps(decoded_output, indent=4, sort_keys=True))

        args.vm_id = decoded_output['Instances'][0]['InstanceId']
        args.vm_ip = ""  # don't have IP we see it running

        # Name your instance! . Done here instead of in run-instances call
        # it's tricky in bash to get space/qoutes right, at least in original bash code where
        # this was orginally written.. :-)

        self.Inform("create-tags")

        cmd = "aws ec2 create-tags"
        cmd += " --resource %s" % args.vm_id
        cmd += " --tags Key=Name,Value=%s" % args.vm_name  # unique time-stamped name

        retcode, output, errval = self.DoCmd(cmd)

        # wait till the instance is up and running, pingable and ssh-able

        if (retcode == 0):
            retcode = self.WaitTillRunning(args, "running", TIMEOUT_1)

            # save vm ID and other fields setup here so don't use them if error later

        self.ArgSaveToFile(args)

        debug(2, "createVM returning %d" % retcode)
        return retcode  # 0: succcess, 1: failure
예제 #5
0
    def CreateSecurityGroup(self, args):
        ''' creates security group. saves it in args.nsg_id '''

        trace(2, "\"%s\" %s" % (args.nsg_name, args.nsg_id))

        # Get the users VPC id if we don't have it

        if (args.vpcid == "" or args.vpcid == None or args.vpcid == "None"):
            cmd = "aws ec2 describe-vpcs"
            cmd += " --region %s" % args.region
            retcode, output, errval = self.DoCmd(cmd)  # call the AWS command
            if (retcode != 0):
                return retcode
            decoded_output = json.loads(output)
            debug(2, json.dumps(decoded_output, indent=4, sort_keys=True))
            args.vpcid = decoded_output["Vpcs"][0]["VpcId"]
            debug(1, "args.vpcid <--- %s" % args.vpcid)

            # create the security group, with a meaningful description

        desc = "NSG Generated for %s" % args.vm_name

        cmd = "aws ec2 create-security-group"
        cmd += " --group-name %s" % args.nsg_name
        cmd += " --description \"%s\"" % desc
        cmd += " --vpc-id %s" % args.vpcid
        cmd += " --region %s" % args.region

        retcode, output, errval = self.DoCmd(cmd)  # call the AWS command
        if (retcode != 0):  # check for return code
            return retcode

            # get the groupid of the new security group

        decoded_output = json.loads(output)
        debug(2, json.dumps(decoded_output, indent=4, sort_keys=True))
        args.nsg_id = decoded_output["GroupId"]
        debug(1, "args.nsg_id <--- %s" % args.nsg_id)

        # tag new group with our group name

        cmd = "aws ec2 create-tags"
        cmd += " --resource %s" % args.nsg_id
        cmd += " --tags Key=Name,Value=%s" % args.nsg_name

        retcode, output, errval = self.DoCmd(cmd)  # call the AWS command
        if (retcode != 0):  # check for return code
            return retcode

            # Security rules -- make a list of ingress and outgress rules - easy to change
            # slow, but this code is rarely used. understandability is more important

        ingress = {}
        ingress[0] = {
            "IpProtocol": "tcp",
            "ToPort": 22,
            "FromPort": 22,
            "CidrIp": "0.0.0.0/0",
            "Description": "For SSH"
        }
        ingress[1] = {
            "IpProtocol": "tcp",
            "ToPort": 443,
            "FromPort": 443,
            "CidrIp": "0.0.0.0/0",
            "Description": "For SSL"
        }
        ingress[2] = {
            "IpProtocol": "tcp",
            "ToPort": 5000,
            "FromPort": 5000,
            "CidrIp": "0.0.0.0/0",
            "Description": "For NVIDIA DIGITS6"
        }
        ingress[3] = {
            "IpProtocol": "icmp",
            "ToPort": -1,
            "FromPort": 8,
            "CidrIp": "0.0.0.0/0",
            "Description": "To allow to be pinged"
        }

        egress = {}

        outer_retcode = 0
        for idx in range(0, len(ingress)):
            self.Inform("CreateNSG rule %s.%s" % args.nsg_name,
                        ingress[idx]["Name"])

            cmd = "aws ec2 authorize-security-group-ingress"
            cmd += " --group-id %s" % args.nsg_id
            cmd += " --ip-permissions '[{"  # mini-embedded json like
            cmd += " \"IpProtocol\":\"%s\"," % ingress[idx]["IpProtocol"]
            cmd += " \"ToPort\":%s," % ingress[idx][
                "ToPort"]  # KEEP 'To' before 'From' - no effect for tcp, but
            cmd += " \"FromPort\":%s," % ingress[idx][
                "FromPort"]  # required for how Wildcard ICMP type is defined
            cmd += " \"IpRanges\": [{"
            cmd += " \"CidrIp\":\"%s\"," % ingress[idx]["CidrIp"]
            cmd += " \"Description\":\"%s\"" % ingress[idx]["Description"]
            cmd += " }]"
            cmd += " }]'"

            retcode, output, errval = self.DoCmd(cmd)  # call the AWS command
            if (retcode != 0):
                outer_retcode = retcode  # keep any non-zero return code

                # egress rules -- as of 1/2018 there arn't any...

        return outer_retcode
예제 #6
0
    def CreateVM(self, args):
        ''' Creates a new VM. 'args' holds parameters '''

        if (args.vm_id != "None" and args.vm_id != None):
            error(
                "Instance \"%s\" already exists, run 'deleteVM' first, or 'clean' if stale arg list"
                % args.vm_id)
            return 1

            # make sure our persistant IP address is clear

        args.vm_ip = ""

        # public ssh key file, builds path from options, checks existance
        # this sets args.key_file to "keyfile.pub"  (better known as "id_rsa.pub")

        retcode = self.CheckSSHKeyFilePath(args, ".pub")
        if (retcode != 0):
            return (retcode)
        keyfile_pub = args.key_file
        # print "keyfile_pub:%s" % keyfile_pub

        # however other than in the createVM, the private Key file
        # is required for all the local ssh'ing that we will be doing

        retcode = self.CheckSSHKeyFilePath(args, "")
        if (retcode != 0):
            return (retcode)

            # ssh key file, builds path from options, checks existance
            # metadata consists of user name, and the "ssh key" file
            #
            # Note that where we pass azure the name of our public ssh key,
            # with Google the entire public key string is passsed in the metadata
            #
            # Example:
            #     metadata = "ssh-keys=newtonl:ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDbzMfRh2nXbcwqqVjGvMgOqD3FyJHk4hGdXofLfBAsfQtZQbUg208yWqPEdFgPVyw8zwhd2WAEnaRSK6TmNOok5qgCydpjxbqoCNIfdhfOSFl+T6veiibzQ2UyWolxNPaQ4IPE4FdQsNDM37lsQNCFyZfBaqfbTSmDi5W8Odoqf7E2tfXcLD4gsFpexM4bgK43aaOCp/ekCiJi+Y13MJTw5VmLIdLgJZ/40oMRpK6nZcipbkHkVQEV9mLpTKDLG/xvb7gRzFiXbp4qgF9dWQKqIkfL4UNpcKTjYXqmdt2okoeDGVhQ0AnVM1pHKIyVulV5c17jz7wyj+0UaizAFvSh [email protected]"
            #
            # Note: The first few characters of the id_rsa.pub file is "ssh-rsa AAAAB3..."
            #       don't need to explicitly pass in "ssh-rsa" here. Don't over complicate it
            #
        with open(keyfile_pub, "r") as f:
            ssh_rsa_data = f.read()

        metadata = "ssh-keys=%s:%s" % (args.user, ssh_rsa_data)

        # with Google, don't need to create a network security group.
        # mostly inherit defaults from the main scription

        # neat thing with Google, is that we can specify GPU's at VM init time
        # with other CSPs, number/type of GPU's is a function of the "instance_type"

        accelerator_count = 0  # used for delay before ping below
        if (args.accelerator_type != None and args.accelerator_type != ""
                and args.accelerator_type != "None"
                and args.accelerator_count > 0):
            accelerator = "%s,count=%d" % (args.accelerator_type,
                                           args.accelerator_count)
            accelerator_count = args.accelerator_count

            # if adding GPUs, add additional info to the VM name
            #
            # Google GPU 'accelerator' types are of form: nvidia-tesla-p100 - too long for VM name which is
            # limited to 61 chars - so strip of last what's after last '-' as name
            #
            # Remember with google, names must all be lowercase numbers/letters

            if (args.vm_name.find("gpu") == -1):  # haven't added "gpu" yet
                type = args.accelerator_type[args.accelerator_type.rfind("-") +
                                             1:]
                args.vm_name += "-%dx%sgpu" % (args.accelerator_count, type)
        else:
            accelerator = None  # don't assign gpus

            # Create the VM
            # NOTE: with gcp, it's not necessary to assign it Network Security Groups
            #       when creating the VM's -- Called "network firewall rules", they are
            #       added later after the VM is created.

        self.Inform("CreateVM")
        cmd = "gcloud --format=\"json\" beta compute"
        cmd += " --project \"%s\" " % args.project  # "my-project"
        cmd += "instances create \"%s\"" % args.vm_name  # "pbradstr-Fri-2018Mar02-181931"
        cmd += " --zone \"%s\"" % args.region  # "us-west1-b"
        cmd += " --quiet"  # reduces noize output
        cmd += " --machine-type \"%s\"" % args.instance_type  # "n1-standard-1"
        cmd += " --subnet \"%s\"" % args.subnet  # default
        cmd += " --metadata \"%s\"" % metadata
        cmd += " --maintenance-policy \"%s\"" % args.maintenance_policy  # "TERMINATE"
        cmd += " --service-account \"%s\"" % args.service_account  # "*****@*****.**"
        #       cmd += " --scopes %s"                     % args.scopes              # https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring.write","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" \
        if (accelerator != None):  # optional if we want GPUs
            cmd += " --accelerator type=%s" % accelerator  # nvidia-tesla-p100,count=1"
        cmd += " --min-cpu-platform \"%s\"" % args.min_cpu_platform  # "Automatic"
        cmd += " --image \"%s\"" % args.image_name  # "nvidia-gpu-cloud-image-20180227"
        cmd += " --image-project \"%s\"" % args.image_project  # "nvidia-ngc-public"
        cmd += " --boot-disk-size %d" % args.boot_disk_size  # 32, in GB
        cmd += " --boot-disk-type \"%s\"" % args.boot_disk_type  # "pd-standard"
        cmd += " --boot-disk-device-name \"%s\"" % args.vm_name  #  assume same as VM name

        # To break big command into individual options per line for debugging
        # echo $V | sed -e $'s/ --/\\\n --/g'

        # execute the command

        rc, output, errval = self.DoCmd(cmd)
        if (rc != 0):  # check for return code
            error("Problems creating VM \"%s\"" % args.vm_name)
            return rc

            # Get the returend information, pull out the vmID and (if possible)
            # the public IP address of the VM
            #
            # NOTE: with gcp, IP address is assigned in output from 'create' commmand
            #       don't need to poll for it  (we waited for command to complete instead)

        decoded_output = json.loads(
            output)  # convert json format to python structure
        trace(3, json.dumps(decoded_output, indent=4, sort_keys=True))

        # FYI: reason why [0] is user here is that json output format could
        #      possibly supply more than one instance of data. Since our request
        #      is specific to one instance, the [0] grouping is kind of redundant

        args.vm_id = decoded_output[0][
            'id']  # may not actually need the ID, all vm_name based
        args.vm_ip = decoded_output[0]['networkInterfaces'][0][
            'accessConfigs'][0]['natIP']

        # save vm ID and other fields setup here so don't use them if error later
        # actually don't care if it's fully running, (that would be nice) but
        # need to save the VM id here since we need to delete it in any case

        self.ArgSaveToFile(args)

        # Google has a habbit of reusing the IP addresses, way more than any other
        # csp that I've tested. But since this is an old IP with a new VM, if that
        # IP exists in the known_hosts file, it's going to cause problems when
        # we try to ssh into it (as will happen right away with "WaitTillRunning"
        # Blow away value in known-hosts now. Note that it's also removed when
        # the VM is deleted... but done here on create if forgot or removed some
        # other way.   (TODO: This step needed on other CSPs ? )

        self.DeleteIPFromSSHKnownHostsFile(args)

        # quick sanity check -- verify the name returned from the create command
        # is the same as we were given

        returned_name = decoded_output[0]["name"]
        # print("name:%s" % returned_name)
        if (decoded_output[0]["name"] != args.vm_name):
            error(
                "sanity check: vm name returned \"%s\" != vm_name \"%s\" given to create command"
                % (returned_name, args.vm_name))
            json.dumps(decoded_output, indent=4, sort_keys=True)
            return 1

            # Seeing an error here on gcloud only where
            #
            #      1) VM is up in gcloud web page, and can ssh into it there from the web page
            #      2) the first ping in WaitTillRunning succeeds
            #      3) the ssh in WaitTillRunning fails with a timeout
            #      4) any further ping or ssh fails
            #      5) see #1
            #
            # A delay before the first ping seems to workaround the problem
            # 5 seconds is not enough, got 30% error rates. 10 seconds seems
            # to work at least with"n1-standard-1" instances and no gpus
            #
            # Adding and additional 10 seconds per GPU. Emperical value
            #

        delay = 10 + (accelerator_count * 10)
        debug(
            0,
            "WORKAROUND: external network connect - sleep for %d seconds before ping"
            % (delay))
        time.sleep(delay)  # wait a few seconds before ANY command to vm

        # Another sanity check -- gcp will return from create only once the
        # vm is up and running. This code here (which comes from aws implementation)
        # wait's till we can ping and ssh into the VM. It should take little
        # time here with gcp, but on the other hand it's a good confidence booster
        # to know that we have checked and hav verified that can ping and ssh into
        # the vm.

        if (rc == 0):
            rc = self.WaitTillRunning(args, "RUNNING", TIMEOUT_1)

            # returns 0 only if VM is fully up and running, we have it's public IP
            # and can ssh into it

        debug(2, "createVM returning %d" % rc)
        return rc  # 0: succcess, 1: failure