Example #1
0
 def tearDownClass( cls ):
     ctx = Context( os.environ[ 'CGCLOUD_ZONE' ], os.environ[ 'CGCLOUD_NAMESPACE' ] )
     # Only cleanup if the context is using the default test namespace. If another namespace
     # is configured, we can't assume that all resources were created by the test and that
     # they can therefore be removed.
     if cls.cleanup and ctx.namespace == cls.namespace:
         ctx.cleanup()
     super( CgcloudTestCase, cls ).tearDownClass( )
Example #2
0
 def launchCluster(cls, instanceType, keyName, clusterName, spotBid=None):
     ctx = Context(availability_zone='us-west-2a', namespace=cls._toNameSpace(clusterName))
     profileARN = cls._getProfileARN(ctx)
     # the security group name is used as the cluster identifier
     cls._createSecurityGroup(ctx, clusterName)
     bdm = cls._getBlockDeviceMapping(ec2_instance_types[instanceType])
     dockerLeaderData = cls.dockerInfo().rsplit(':', 1)
     leaderRepo = dockerLeaderData[0]
     leaderTag = dockerLeaderData[1]
     leaderData = {'role': 'leader', 'tag': leaderTag,
                   'args': leaderArgs.format(name=clusterName), 'repo': leaderRepo}
     userData = awsUserData.format(**leaderData)
     kwargs = {'key_name': keyName, 'security_groups': [clusterName],
               'instance_type': instanceType,
               'user_data': userData, 'block_device_map': bdm,
               'instance_profile_arn': profileARN}
     if not spotBid:
         logger.info('Launching non-preemptable leader')
         create_ondemand_instances(ctx.ec2, image_id=coreOSAMI,
                                   spec=kwargs, num_instances=1)
     else:
         logger.info('Launching preemptable leader')
         # force generator to evaluate
         list(create_spot_instances(ec2=ctx.ec2, price=spotBid, image_id=coreOSAMI,
                                    clusterName=clusterName, spec=kwargs, num_instances=1))
     return cls._getLeader(clusterName=clusterName, wait=True)
Example #3
0
 def run( self, options ):
     zone = options.availability_zone
     namespace = options.namespace
     ctx = None
     try:
         ctx = Context( availability_zone=zone, namespace=namespace )
     except ValueError as e:
         raise UserError( cause=e )
     except:
         # print the namespace without __me__ substituted
         log.error( "An error occurred. Using zone '%s' and namespace '%s'", zone, namespace )
         raise
     else:
         # print the namespace with __me__ substituted
         log.info( "Using zone '%s' and namespace '%s'", ctx.availability_zone, ctx.namespace )
         return self.run_in_ctx( options, ctx )
     finally:
         if ctx is not None: ctx.close( )
Example #4
0
 def _buildContext(cls, clusterName, zone=None):
     if zone is None:
         zone = getCurrentAWSZone()
         if zone is None:
             raise RuntimeError(
                 'Could not determine availability zone. Insure that one of the following '
                 'is true: the --zone flag is set, the TOIL_AWS_ZONE environment variable '
                 'is set, ec2_region_name is set in the .boto file, or that '
                 'you are running on EC2.')
     return Context(availability_zone=zone, namespace=cls._toNameSpace(clusterName))
Example #5
0
 def run(self, options):
     zone = options.availability_zone
     namespace = options.namespace
     ctx = None
     try:
         ctx = Context(availability_zone=zone, namespace=namespace)
     except ValueError as e:
         raise UserError(cause=e)
     except:
         # print the namespace without __me__ substituted
         log.error("An error occurred. Using zone '%s' and namespace '%s'",
                   zone, namespace)
         raise
     else:
         # print the namespace with __me__ substituted
         log.info("Using zone '%s' and namespace '%s'",
                  ctx.availability_zone, ctx.namespace)
         return self.run_in_ctx(options, ctx)
     finally:
         if ctx is not None: ctx.close()
Example #6
0
 def __init__(self, config, batchSystem):
     self.instanceMetaData = get_instance_metadata()
     self.clusterName = self.instanceMetaData['security-groups']
     self.ctx = Context(availability_zone='us-west-2a', namespace=self._toNameSpace(self.clusterName))
     self.spotBid = None
     assert config.preemptableNodeType or config.nodeType
     if config.preemptableNodeType is not None:
         nodeBidTuple = config.preemptableNodeType.split(':', 1)
         self.spotBid = nodeBidTuple[1]
         self.instanceType = ec2_instance_types[nodeBidTuple[0]]
     else:
         self.instanceType = ec2_instance_types[config.nodeType]
     self.batchSystem = batchSystem
     self.leaderIP = self.instanceMetaData['local-ipv4']
     self.keyName = self.instanceMetaData['public-keys'].keys()[0]
Example #7
0
 def run():
     log.info("Entering main loop.")
     ctx = Context(availability_zone=options.availability_zone,
                   namespace=options.namespace)
     throttle = LocalThrottle(min_interval=options.interval)
     for i in itertools.count():
         throttle.throttle()
         try:
             log.info("Starting run %i.", i)
             Agent(ctx, options).run()
             log.info("Completed run %i.", i)
         except (SystemExit, KeyboardInterrupt):
             log.info('Terminating.')
             break
         except:
             log.exception('Abandoning run due to exception')
Example #8
0
 def setUpClass(cls):
     super(CgcloudTestCase, cls).setUpClass()
     if running_on_ec2():
         os.environ.setdefault(
             'CGCLOUD_ZONE',
             get_instance_metadata()['placement']['availability-zone'])
     # Using the d32 of a binary string that starts with a 4-byte, big-endian time stamp
     # yields compact names whose lexicographical sorting is consistent with the historical
     # order. We add the process ID so we can run tests concurrently in child processes using
     # the pytest-xdist plugin.
     suffix = aws_d32.encode(pack('>II', int(time.time()), os.getpid()))
     assert len(suffix) == test_namespace_suffix_length
     cls.__namespace = '/test/%s/' % suffix
     os.environ.setdefault('CGCLOUD_NAMESPACE', cls.__namespace)
     cls.ctx = Context(availability_zone=os.environ['CGCLOUD_ZONE'],
                       namespace=os.environ['CGCLOUD_NAMESPACE'])
Example #9
0
 def _getLeader(cls, clusterName, wait=False):
     ctx = Context(availability_zone='us-west-2a', namespace=cls._toNameSpace(clusterName))
     instances = cls.__getNodesInCluster(ctx, clusterName, both=True)
     instances.sort(key=lambda x: x.launch_time)
     leader = instances[0]  # assume leader was launched first
     if wait:
         logger.info("Waiting for leader to enter 'running' state...")
         wait_transition(leader, {'pending'}, 'running')
         logger.info('... leader is running')
         cls._waitForIP(leader)
         leaderIP = leader.ip_address
         cls._waitForSSHPort(leaderIP)
         # wait here so docker commands can be used reliably afterwards
         cls._waitForDockerDaemon(leaderIP)
         cls._waitForAppliance(leaderIP)
     return leader
Example #10
0
    def destroyCluster(cls, clusterName):
        def expectedShutdownErrors(e):
            return e.status == 400 and 'dependent object' in e.body

        ctx = Context(availability_zone='us-west-2a', namespace=cls._toNameSpace(clusterName))
        instances = cls.__getNodesInCluster(ctx, clusterName, both=True)
        spotIDs = cls._getSpotRequestIDs(ctx, clusterName)
        if spotIDs:
            ctx.ec2.cancel_spot_instance_requests(request_ids=spotIDs)
        if instances:
            cls._deleteIAMProfiles(instances=instances, ctx=ctx)
            cls._terminateInstance(instances=instances, ctx=ctx)
        logger.info('Deleting security group...')
        for attempt in retry_ec2(retry_after=30, retry_for=300, retry_while=expectedShutdownErrors):
            with attempt:
                ctx.ec2.delete_security_group(name=clusterName)
        logger.info('... Succesfully deleted security group')