def apply_workers( ): log.info( '=== Performing %s on workers ===', operation ) instances = first_worker.list( leader_instance_id=leader.instance_id ) papply( apply_worker, pool_size=pool_size, seq=zip( concat( first_worker, clones( ) ), (i.id for i in instances) ) )
def addNodes(self, numNodes=1, preemptable=False): instanceSpec = dict(image_id=self.ami[preemptable], key_name=self._keyName, user_data=self._userData(), instance_type=self.instanceType[preemptable], instance_profile_arn=self._instanceProfileArn(), security_group_ids=self._securityGroupIds, ebs_optimized=self.ebsOptimized, dry_run=False) if preemptable: requests = self._ec2.request_spot_instances( price=self.spotBid, count=numNodes, # TODO: spread nodes over availability zones placement=self._availabilityZone(), placement_group=None, launch_group=None, availability_zone_group=None, **instanceSpec) instances = wait_for_spot_instances(self._ec2, requests) else: reservation = self._ec2.run_instances(min_count=numNodes, max_count=numNodes, **instanceSpec) instances = reservation.instances # Wait for all nodes concurrently using a thread pool. The pool size is capped, though. # FIXME: It may be more efficient (and AWS-friendly) to request the state of all # instances in a single request. # def wait_running(instance): wait_transition(instance, from_states={'pending'}, to_state='running') assert len(instances) == numNodes papply(wait_running, instances) # If the batch system is scalable, we can use it to wait for the nodes to join the cluster if isinstance(self.batchSystem, AbstractScalableBatchSystem): while instances: numNodes = self.batchSystem.getNodes() for address in numNodes.keys(): instances.remove(address) time.sleep(10)
def addNodes(self, numNodes=1, preemptable=False): instanceSpec = dict(image_id=self.ami[preemptable], key_name=self._keyName, user_data=self._userData(), instance_type=self.instanceType[preemptable], instance_profile_arn=self._instanceProfileArn(), security_group_ids=self._securityGroupIds, ebs_optimized=self.ebsOptimized, dry_run=False) if preemptable: requests = self._ec2.request_spot_instances(price=self.spotBid, count=numNodes, # TODO: spread nodes over availability zones placement=self._availabilityZone(), placement_group=None, launch_group=None, availability_zone_group=None, **instanceSpec) instances = wait_for_spot_instances(self._ec2, requests) else: reservation = self._ec2.run_instances(min_count=numNodes, max_count=numNodes, **instanceSpec) instances = reservation.instances # Wait for all nodes concurrently using a thread pool. The pool size is capped, though. # FIXME: It may be more efficient (and AWS-friendly) to request the state of all # instances in a single request. # def wait_running(instance): wait_transition(instance, from_states={'pending'}, to_state='running') assert len(instances) == numNodes papply(wait_running, instances) # If the batch system is scalable, we can use it to wait for the nodes to join the cluster if isinstance(self.batchSystem, AbstractScalableBatchSystem): while instances: numNodes = self.batchSystem.getNodes() for address in numNodes.keys(): instances.remove(address) time.sleep(10)
def apply_workers( ): log.info( '=== Performing %s on workers ===', operation ) workers = first_worker.list( leader_instance_id=leader.instance_id, wait_ready=wait_ready ) # zip() creates the singleton tuples that papply() expects papply( f, seq=zip( workers ), pool_size=pool_size, callback=callback )
def apply_workers(): log.info('=== Performing %s on workers ===', operation) workers = first_worker.list(leader_instance_id=leader.instance_id, wait_ready=wait_ready) # zip() creates the singleton tuples that papply() expects papply(f, seq=zip(workers), pool_size=pool_size, callback=callback)
def apply_workers(): log.info("=== Performing %s on workers ===", operation) instances = first_worker.list(leader_instance_id=leader.instance_id) papply( apply_worker, pool_size=pool_size, seq=zip(concat(first_worker, clones()), (i.id for i in instances)) )