def authenticate_credentials(self, key): # pylint:disable=arguments-differ try: auth_parts = base64.b64decode(key).decode('utf-8').split( RedisEphemeralTokens.SEPARATOR) except (TypeError, UnicodeDecodeError, binascii.Error): msg = 'Invalid basic header. Credentials not correctly base64 encoded.' raise exceptions.AuthenticationFailed(msg) if len(auth_parts) != 2: msg = 'Invalid token header. Token should contain token and uuid.' raise exceptions.AuthenticationFailed(msg) token = auth_parts[0] token_uuid = auth_parts[1] ephemeral_token = RedisEphemeralTokens(token_uuid) if not ephemeral_token: msg = 'Invalid token.' raise exceptions.AuthenticationFailed(msg) scope = ephemeral_token.scope if not ephemeral_token.check_token(token=token): ephemeral_token.clear() msg = 'Invalid token header' raise exceptions.AuthenticationFailed(msg) return EphemeralUser(scope=scope), None
def run(cls): results = {} result = cls.redis_health(RedisEphemeralTokens.connection()) if not result.is_healthy: results['REDIS_EPH_TOKENS'] = result result = cls.redis_health(RedisSessions.connection()) if not result.is_healthy: results['REDIS_SESSIONS'] = result result = cls.redis_health(RedisTTL.connection()) if not result.is_healthy: results['REDIS_TTL'] = result result = cls.redis_health(RedisToStream.connection()) if not result.is_healthy: results['REDIS_TO_STREAM'] = result result = cls.redis_health(RedisJobContainers.connection()) if not result.is_healthy: results['REDIS_CONTAINERS'] = result if not results: results = {'REDIS': Result()} return results
def post(self, request, *args, **kwargs): user = request.user if user.scope is None: return Response(status=status.HTTP_403_FORBIDDEN) experiment = self.get_object() if experiment.last_status not in [ExperimentLifeCycle.SCHEDULED, ExperimentLifeCycle.STARTING, ExperimentLifeCycle.RUNNING]: return Response(status=status.HTTP_403_FORBIDDEN) scope = RedisEphemeralTokens.get_scope(user=experiment.user.id, model='experiment', object_id=experiment.id) if sorted(user.scope) != sorted(scope): return Response(status=status.HTTP_403_FORBIDDEN) token, _ = Token.objects.get_or_create(user=experiment.user) return Response({'token': token.key}, status=status.HTTP_200_OK)
def start_experiment(experiment): # Update experiment status to show that its started experiment.set_status(ExperimentLifeCycle.SCHEDULED) project = experiment.project group = experiment.experiment_group job_docker_image = None # This will force the spawners to use the default docker image if experiment.specification.build: try: image_name, image_tag = get_image_info( build_job=experiment.build_job) except (ValueError, AttributeError): _logger.error('Could not start the experiment.', exc_info=True) experiment.set_status(ExperimentLifeCycle.FAILED, message='Image info was not found.') return job_docker_image = '{}:{}'.format(image_name, image_tag) _logger.info('Start experiment with built image `%s`', job_docker_image) else: _logger.info('Start experiment with default image.') spawner_class = get_spawner_class(experiment.specification.framework) token_scope = RedisEphemeralTokens.get_scope(experiment.user.id, 'experiment', experiment.id) error = {} try: # Use spawners to start the experiment spawner = spawner_class( project_name=project.unique_name, experiment_name=experiment.unique_name, experiment_group_name=group.unique_name if group else None, project_uuid=project.uuid.hex, experiment_group_uuid=group.uuid.hex if group else None, experiment_uuid=experiment.uuid.hex, persistence_config=experiment.persistence_config, outputs_refs_experiments=experiment.outputs_refs_experiments, outputs_refs_jobs=experiment.outputs_refs_jobs, original_name=experiment.original_unique_name, cloning_strategy=experiment.cloning_strategy, spec=experiment.specification, k8s_config=conf.get('K8S_CONFIG'), namespace=conf.get('K8S_NAMESPACE'), in_cluster=True, job_docker_image=job_docker_image, use_sidecar=True, token_scope=token_scope) response = spawner.start_experiment() handle_experiment(experiment=experiment, spawner=spawner, response=response) except ApiException as e: _logger.error( 'Could not start the experiment, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the experiment, encountered a Kubernetes ApiException.' } except VolumeNotFoundError as e: _logger.error( 'Could not start the experiment, please check your volume definitions.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the experiment, ' 'encountered a volume definition problem, %s.' % e } except Exception as e: _logger.error( 'Could not start the experiment, please check your polyaxon spec', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the experiment encountered an {} exception.'. format(e.__class__.__name__) } finally: if error.get('raised'): experiment.set_status(ExperimentLifeCycle.FAILED, message=error.get('message'), traceback=error.get('traceback'))
def _create_job( self, # pylint:disable=arguments-differ task_type, command=None, args=None, env_vars=None, resources=None, node_selector=None, affinity=None, tolerations=None, replicas=1, restart_policy='Never'): ephemeral_token = None if self.token_scope: ephemeral_token = RedisEphemeralTokens.generate_header_token( scope=self.token_scope) resource_name = self.resource_manager.get_kf_resource_name( task_type=task_type) labels = self.resource_manager.get_labels(task_type=task_type) # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts # Validate secret and configmap refs secret_refs = validate_secret_refs(self.spec.secret_refs) configmap_refs = validate_configmap_refs(self.spec.configmap_refs) pod_template_spec = self.resource_manager.get_pod_template_spec( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, ports=self.ports, init_env_vars=self.get_init_env_vars(), persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, secret_refs=secret_refs, configmap_refs=configmap_refs, resources=resources, ephemeral_token=ephemeral_token, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, restart_policy=restart_policy) return { 'replicas': replicas, 'restartPolicy': restart_policy, 'template': pod_template_spec }
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, env_vars=None, resources=None, annotations=None, node_selector=None, affinity=None, tolerations=None, max_restarts=None): ephemeral_token = None if self.token_scope: ephemeral_token = RedisEphemeralTokens.generate_header_token( scope=self.token_scope) resource_name = self.resource_manager.get_resource_name( task_type=task_type, task_idx=task_idx) job_uuid = self.get_job_uuids(task_type=task_type, task_idx=task_idx) reconcile_url = get_experiment_reconcile_url(self.experiment_name, job_uuid) labels = self.get_labels(task_type=task_type, task_idx=task_idx, job_uuid=job_uuid) # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts pod = self.resource_manager.get_task_pod( task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, ports=self.ports, init_env_vars=self.get_init_env_vars(), persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, secret_refs=self.spec.secret_refs, config_map_refs=self.spec.config_map_refs, resources=resources, ephemeral_token=ephemeral_token, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_pod_restart_policy(max_restarts)) pod_resp, _ = self.create_or_update_pod(name=resource_name, body=pod, reraise=True) results = {'pod': pod_resp.to_dict()} if add_service: service = services.get_service(namespace=self.namespace, name=resource_name, labels=labels, ports=self.ports, target_ports=self.ports) service_resp, _ = self.create_or_update_service(name=resource_name, body=service, reraise=True) results['service'] = service_resp.to_dict() return results
def _create_job(self, task_type, task_idx, add_service, command=None, args=None, env_vars=None, resources=None, node_selector=None, affinity=None, tolerations=None, restart_policy='Never'): ephemeral_token = RedisEphemeralTokens.generate_header_token( scope=self.token_scope) job_name = self.pod_manager.get_job_name(task_type=task_type, task_idx=task_idx) sidecar_args = get_sidecar_args(pod_id=job_name) labels = self.pod_manager.get_labels(task_type=task_type, task_idx=task_idx) # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_jobs, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=self.outputs_refs_experiments, persistence_outputs=self.persistence_config.outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts # Validate secret and configmap refs secret_refs = validate_secret_refs(self.spec.secret_refs) configmap_refs = validate_configmap_refs(self.spec.configmap_refs) pod = self.pod_manager.get_pod( task_type=task_type, task_idx=task_idx, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=env_vars, command=command, args=args, sidecar_args=sidecar_args, persistence_outputs=self.persistence_config.outputs, persistence_data=self.persistence_config.data, outputs_refs_jobs=self.outputs_refs_jobs, outputs_refs_experiments=self.outputs_refs_experiments, secret_refs=secret_refs, configmap_refs=configmap_refs, resources=resources, ephemeral_token=ephemeral_token, node_selector=node_selector, affinity=affinity, tolerations=tolerations, restart_policy=restart_policy) pod_resp, _ = self.create_or_update_pod(name=job_name, data=pod) results = {'pod': pod_resp.to_dict()} if add_service: service = services.get_service(namespace=self.namespace, name=job_name, labels=labels, ports=self.pod_manager.ports, target_ports=self.pod_manager.ports) service_resp, _ = self.create_or_update_service(name=job_name, data=service) results['service'] = service_resp.to_dict() return results
def test_objects(self): token = RedisEphemeralTokens() assert token.key is not None assert token.redis_key == RedisEphemeralTokens.KEY_EPHEMERAL_TOKENS.format( token.key) assert token.get_state() is None assert token.salt is None assert token.ttl is None assert token.scope is None token = RedisEphemeralTokens.generate( scope=token.get_scope(1, 'experiment', 1)) assert token.get_state() is not None assert token.salt is not None assert token.ttl == conf.get(TTL_EPHEMERAL_TOKEN) assert token.scope == token.get_scope(1, 'experiment', 1) assert token.check_token('foo') is False # Checking delete the token assert token.get_state() is None token = RedisEphemeralTokens.generate( scope=token.get_scope(1, 'experiment', 1)) assert token.check_token(None) is False # Checking delete the token assert token.get_state() is None token = RedisEphemeralTokens.generate( scope=token.get_scope(1, 'experiment', 1)) valid = RedisEphemeralTokens.make_token(token) assert token.check_token(valid) is True # Checking delete the token assert token.get_state() is None assert token.salt is None assert token.ttl is None assert token.scope is None