def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.cache_stopped_nodes = provider_config.get("cache_stopped_nodes", True) aws_credentials = provider_config.get("aws_credentials") self.ec2 = make_ec2_client( region=provider_config["region"], max_retries=BOTO_MAX_RETRIES, aws_credentials=aws_credentials) self.ec2_fail_fast = make_ec2_client( region=provider_config["region"], max_retries=0, aws_credentials=aws_credentials) # Try availability zones round-robin, starting from random offset self.subnet_idx = random.randint(0, 100) # Tags that we believe to actually be on EC2. self.tag_cache = {} # Tags that we will soon upload. self.tag_cache_pending = defaultdict(dict) # Number of threads waiting for a batched tag update. self.batch_thread_count = 0 self.batch_update_done = threading.Event() self.batch_update_done.set() self.ready_for_new_batch = threading.Event() self.ready_for_new_batch.set() self.tag_cache_lock = threading.Lock() self.count_lock = threading.Lock() # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def mock_init(self, provider_config, cluster_name): # Adds an attribute to detect if the provider has created the head. NodeProvider.__init__(self, provider_config, cluster_name) self.cluster_name = cluster_name self.namespace = provider_config["namespace"] self._head_created = False
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) kwargs = {} if "subscription_id" in provider_config: kwargs["subscription_id"] = provider_config["subscription_id"] try: self.compute_client = get_client_from_cli_profile( client_class=ComputeManagementClient, **kwargs) self.network_client = get_client_from_cli_profile( client_class=NetworkManagementClient, **kwargs) self.resource_client = get_client_from_cli_profile( client_class=ResourceManagementClient, **kwargs) except CLIError as e: if str(e) != "Please run 'az login' to setup account.": raise else: logger.info("CLI profile authentication failed. Trying MSI") credentials = MSIAuthentication() self.compute_client = ComputeManagementClient( credentials=credentials, **kwargs) self.network_client = NetworkManagementClient( credentials=credentials, **kwargs) self.resource_client = ResourceManagementClient( credentials=credentials, **kwargs) self.lock = RLock() # cache node objects self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.cache_stopped_nodes = provider_config.get("cache_stopped_nodes", True) aws_credentials = provider_config.get("aws_credentials") self.ec2 = make_ec2_client(region=provider_config["region"], max_retries=BOTO_MAX_RETRIES, aws_credentials=aws_credentials) self.ec2_fail_fast = make_ec2_client(region=provider_config["region"], max_retries=0, aws_credentials=aws_credentials) # Try availability zones round-robin, starting from random offset self.subnet_idx = random.randint(0, 100) self.tag_cache = {} # Tags that we believe to actually be on EC2. self.tag_cache_pending = {} # Tags that we will soon upload. self.tag_cache_lock = threading.Lock() self.tag_cache_update_event = threading.Event() self.tag_cache_kill_event = threading.Event() self.tag_update_thread = threading.Thread( target=self._node_tag_update_loop) self.tag_update_thread.start() # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def __init__(self, provider_config: dict, cluster_name: str): NodeProvider.__init__(self, provider_config, cluster_name) self.lock = RLock() self._construct_clients() # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes: Dict[str, GCPNode] = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.lock = RLock() self.compute = discovery.build("compute", "v1") # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.lock = RLock() self.compute = discovery.build("compute", "v1") # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.lock = RLock() _, _, self.compute = construct_clients_from_provider_config( provider_config) # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.ec2 = boto3.resource("ec2", region_name=provider_config["region"]) # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {} # Cache of ip lookups. We assume IPs never change once assigned. self.internal_ip_cache = {} self.external_ip_cache = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.compute = discovery.build("compute", "v1") # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {} # Cache of ip lookups. We assume IPs never change once assigned. self.internal_ip_cache = {} self.external_ip_cache = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.__cached = {} self.__star = Staroid(access_token=provider_config["access_token"], account=provider_config["account"]) self.__ske = self._get_config_or_env(provider_config, "ske", "STAROID_SKE") self.__ske_region = self._get_config_or_env(provider_config, "ske_region", "STAROID_SKE_REGION")
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.lock = RLock() gcp_credentials = fetch_gcp_credentials_from_provider_config( provider_config) self.compute = _create_compute(gcp_credentials) # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.compute = discovery.build("compute", "v1") # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {} # Cache of ip lookups. We assume IPs never change once assigned. self.internal_ip_cache = {} self.external_ip_cache = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) config = Config(retries=dict(max_attempts=BOTO_MAX_RETRIES)) self.ec2 = boto3.resource( "ec2", region_name=provider_config["region"], config=config) # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {} # Cache of ip lookups. We assume IPs never change once assigned. self.internal_ip_cache = {} self.external_ip_cache = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) config = Config(retries={'max_attempts': BOTO_MAX_RETRIES}) self.ec2 = boto3.resource( "ec2", region_name=provider_config["region"], config=config) # Try availability zones round-robin, starting from random offset self.subnet_idx = random.randint(0, 100) # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {} # Cache of ip lookups. We assume IPs never change once assigned. self.internal_ip_cache = {} self.external_ip_cache = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) subscription_id = provider_config["subscription_id"] credential = DefaultAzureCredential( exclude_shared_token_cache_credential=True) self.compute_client = ComputeManagementClient(credential, subscription_id) self.network_client = NetworkManagementClient(credential, subscription_id) self.resource_client = ResourceManagementClient( credential, subscription_id) self.lock = RLock() # cache node objects self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) if cluster_name: self.state = ClusterState( "/tmp/cluster-{}.lock".format(cluster_name), "/tmp/cluster-{}.state".format(cluster_name), provider_config, ) self.use_coordinator = False else: # LocalNodeProvider with a coordinator server. self.state = OnPremCoordinatorState( "/tmp/coordinator.lock", "/tmp/coordinator.state", provider_config["list_of_node_ips"]) self.use_coordinator = True
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) config = Config(retries={'max_attempts': BOTO_MAX_RETRIES}) self.ec2 = boto3.resource("ec2", region_name=provider_config["region"], config=config) # Try availability zones round-robin, starting from random offset self.subnet_idx = random.randint(0, 100) # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {} # Cache of ip lookups. We assume IPs never change once assigned. self.internal_ip_cache = {} self.external_ip_cache = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) if "RAY_FAKE_CLUSTER" not in os.environ: raise RuntimeError( "FakeMultiNodeProvider requires ray to be started with " "RAY_FAKE_CLUSTER=1 ray start ...") self._nodes = { FAKE_HEAD_NODE_ID: { "tags": { TAG_RAY_NODE_KIND: NODE_KIND_HEAD, TAG_RAY_USER_NODE_TYPE: FAKE_HEAD_NODE_TYPE, TAG_RAY_NODE_NAME: FAKE_HEAD_NODE_ID, TAG_RAY_NODE_STATUS: STATUS_UP_TO_DATE, } }, } self._next_node_id = 0
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) if cluster_name: lock_path = get_lock_path(cluster_name) state_path = get_state_path(cluster_name) self.state = ClusterState( lock_path, state_path, provider_config, ) self.use_coordinator = False else: # LocalNodeProvider with a coordinator server. self.state = OnPremCoordinatorState( "/tmp/coordinator.lock", "/tmp/coordinator.state", provider_config["list_of_node_ips"]) self.use_coordinator = True
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) config = Config(retries={"max_attempts": BOTO_MAX_RETRIES}) self.ec2 = boto3.resource( "ec2", region_name=provider_config["region"], config=config) # Try availability zones round-robin, starting from random offset self.subnet_idx = random.randint(0, 100) self.tag_cache = {} # Tags that we believe to actually be on EC2. self.tag_cache_pending = {} # Tags that we will soon upload. self.tag_cache_lock = threading.Lock() self.tag_cache_update_event = threading.Event() self.tag_cache_kill_event = threading.Event() self.tag_update_thread = threading.Thread( target=self._node_tag_update_loop) self.tag_update_thread.start() # Cache of node objects from the last nodes() call. This avoids # excessive DescribeInstances requests. self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) kwargs = {} if "subscription_id" in provider_config: kwargs["subscription_id"] = provider_config["subscription_id"] try: self.compute_client = get_client_from_cli_profile( client_class=ComputeManagementClient, **kwargs) self.network_client = get_client_from_cli_profile( client_class=NetworkManagementClient, **kwargs) except Exception: logger.info("CLI profile authentication failed. Trying MSI", exc_info=True) credentials = MSIAuthentication() self.compute_client = ComputeManagementClient( credentials=credentials, **kwargs) self.network_client = NetworkManagementClient( credentials=credentials, **kwargs) self.lock = RLock() # cache node objects self.cached_nodes = {}
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.state = ClusterState("/tmp/cluster-{}.lock".format(cluster_name), "/tmp/cluster-{}.state".format(cluster_name), provider_config)
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.ec2 = boto3.resource("ec2", region_name=provider_config["region"])
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.cluster_name = cluster_name self.namespace = provider_config["namespace"]
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.coordinator_address = provider_config["coordinator_address"]
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.state = ClusterState("/tmp/cluster-{}.lock".format(cluster_name), "/tmp/cluster-{}.state".format(cluster_name), provider_config)
def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.nodes = {}