def list_operations(status=None, previous_search_token=None, limit=None): # noqa: E501 """lists operations see all operations for the user # noqa: E501 :param status: filter by status :type status: dict | bytes :param previous_search: continue previous search (pagination chunks) :type previous_search: dict | bytes :param limit: maximum number of records to return per page :type limit: int :rtype: OperationSearchResult """ try: return workflow_service.list_operations( status, continue_token=previous_search_token, limit=limit) except BadParam as e: return (f'Bad parameter: {e.param}, {e}', e.status) except OperationException as e: log.error(f'Unhandled remote exception while retrieving workflows', exc_info=e) return '', e.status
def create_topic(self): """ Connects to cloudharness Events and creates a new topic Return: True if topic was created correctly, False otherwise. """ ## Connect to kafka admin_client = KafkaAdminClient( bootstrap_servers=self._get_bootstrap_servers(), client_id=self._get_client_id()) # ## Create topic new_topic = NewTopic(name=self.topic_id, num_partitions=1, replication_factor=1) try: result = admin_client.create_topics(new_topics=[new_topic], validate_only=False) log.info(f"Created new topic {self.topic_id}") return result except TopicAlreadyExistsError as e: # topic already exists "no worries", proceed return True except Exception as e: log.error(f"Error creating the new Topics --> {e}", exc_info=True) raise EventGeneralException from e
def produce(self, message: dict): ''' Write a message to the current topic Params: message: dict with message to be published. Return: True if the message was published correctly, False otherwise. ''' producer = KafkaProducer( bootstrap_servers=self._get_bootstrap_servers(), value_serializer=lambda x: dumps(x).encode('utf-8')) try: return producer.send(self.topic_id, value=message) except KafkaTimeoutError as e: try: # it could be that the topic wasn't created yet # let's try to create it and resend the message self.create_topic() return producer.send(self.topic_id, value=message) except KafkaTimeoutError as e: log.error("Not able to fetch topic metadata", exc_info=True) raise EventTopicProduceException from e except Exception as e: log.error(f"Error produce to topic {self.topic_id} --> {e}", exc_info=True) raise EventGeneralException from e finally: producer.close()
def keycloak_user_id(): try: return get_auth_client().get_current_user().get("id", None) except Exception as e: log.error("Auth client error: cannot retrieve the current user", exc_info=True) return None
def handle_exception(e: Exception): data = {"description": str(e), "type": type(e).__name__} try: if not get_current_configuration().is_sentry_enabled(): data['trace'] = traceback.format_exc() except: logging.error("Error checking sentry configuration", exc_info=True) data['trace'] = traceback.format_exc() logging.error(str(e), exc_info=True) return json.dumps(data), 500
def consume_all(self, group_id='default') -> list: ''' Return a list of messages published in the topic ''' consumer = self._get_consumer(group_id) try: for topic in consumer.poll(10000).values(): return [record.value for record in topic] except Exception as e: log.error( f"Error trying to consume all from topic {self.topic_id} --> {e}", exc_info=True) raise EventTopicConsumeException from e finally: consumer.close()
def execute(self, timeout=None): self.persisted = self.submit() start_time = time.time() while not self.persisted.is_finished(): time.sleep(POLLING_WAIT_SECONDS) log.debug(f"Polling argo workflow {self.persisted.name}") self.persisted = argo.get_workflow(self.persisted.name) log.debug( f"Polling succeeded for {self.persisted.name}. Current phase: {self.persisted.status}" ) if timeout and time.time() - start_time > timeout: log.error("Timeout exceeded while polling for results") return self.persisted return self.persisted
def start_handlers(self): """ Start consuming incomming messages """ self._init_handlers() # use a sleep loop to not frustrate the cpu nap_time = 30 try: while True: time.sleep(nap_time) # sleep xx seconds log.debug("Running...") except Exception as e: log.error( 'Notification Controller threw an error, stopping handlers.', exc_info=True) finally: self.stop_handlers()
def wrapper(self, *args, **kwargs): result = func(self, *args, **kwargs) if isinstance(result, tuple): obj = result[0] else: obj = result try: EventClient.send_event(message_type=message_type, operation=operation, func_name=func, func_args=args, func_kwargs=kwargs, uid=uid, obj=obj) except Exception as e: logger.error('send_event error.', exc_info=True) return result
def delete_operation(name): # noqa: E501 """deletes operation by name delete operation by its name # noqa: E501 :param name: :type name: str :rtype: None """ try: workflow_service.delete_operation(name) except OperationNotFound as e: return (f'{name} not found', 404) except OperationException as e: log.error(f'Unhandled remote exception while deleting workflow {name}', exc_info=e) return f'Unexpected error', e.status
def get_operation(name): # noqa: E501 """get operation by name retrieves an operation by its name # noqa: E501 :param name: :type name: str :rtype: List[Operation] """ try: return workflow_service.get_operation(name) except OperationNotFound as e: return (f'{name} not found', 404) except OperationException as e: log.error( f'Unhandled remote exception while retrieving workflow {name}', exc_info=e) return f'Unexpected error', e.status
def delete_topic(self) -> bool: log.debug("Deleting topic " + self.topic_id) ## Connect to kafka admin_client = KafkaAdminClient( bootstrap_servers=self._get_bootstrap_servers(), client_id=self._get_client_id()) ## Delete topic try: admin_client.delete_topics([self.topic_id]) return True except UnknownTopicOrPartitionError as e: log.error(f"Topic {self.topic_id} does not exists.") raise EventTopicDeleteException from e except Exception as e: log.error(f"Error deleting the Topic {self.topic_id} --> {e}", exc_info=True) raise EventGeneralException from e
def submit_sync(): # noqa: E501 """Send a syncronous operation # noqa: E501 :rtype: str """ task = tasks.CustomTask( 'download-file', 'workflows-extract-download', url='https://github.com/MetaCell/cloud-harness/blob/master/README.md') op = operations.DistributedSyncOperation('test-sync-op-', task) try: workflow = op.execute() return workflow.raw.to_dict() except Exception as e: log.error('Error submitting sync operation', exc_info=True) return 'Error submitting operation: %s' % e, 500
def get_user(userid: str) -> User: try: client = AuthClient() kc_user = client.get_user(userid) except KeycloakGetError as e: if e.response_code == 404: raise UserNotFound(userid) raise Exception("Unhandled Keycloak exception") from e except KeycloakError as e: raise Exception("Unhandled Keycloak exception") from e user = map_user(kc_user) try: current_user = client.get_current_user() if not current_user or current_user['id'] != userid: user.email = None except: # user not provided log.error("Error checking user", exc_info=True) user.email = None return user
def send(operation, context): notification_app = apps.get_configuration('notifications') notification = notification_app["notification"]["operations"][operation] for c in notification["channels"]: channel = notification_app["notification"]["channels"][c] for b in channel["backends"]: if b == "email": channel_backend = NotificationEmailBackend elif b == "console": channel_backend = NotificationConsoleBackend try: if channel["adapter"].lower() == "email": NotificationEmailAdapter( notification=notification, channel=channel, backend=channel_backend).send(context=context) else: raise NotImplementedError except Exception as e: logger.error('Sending notification error.', exc_info=True)
def _consume_task(self, app=None, group_id=None, handler=None): log.info( f'Kafka consumer thread started, listening for messages in queue: {self.topic_id}' ) while True: try: self.consumer = self._get_consumer(group_id) for message in self.consumer: try: handler(event_client=self, app=app, message=message.value) except Exception as e: log.error( f"Error during execution of the consumer Topic {self.topic_id} --> {e}", exc_info=True) self.consumer.close() except Exception as e: log.error( f"Error during execution of the consumer Topic {self.topic_id} --> {e}", exc_info=True) time.sleep(15)
class Config(object): DEBUG = False TESTING = False CSRF_ENABLED = True SQLALCHEMY_TRACK_MODIFICATIONS = False SECRET_KEY = 'this-really-needs-to-be-changed' SENTRY_POSTGRES_APP = None SENTRY_APP = None try: SENTRY_POSTGRES_APP = conf.get_application_by_filter( name='sentry')[0].postgres SENTRY_APP = conf.get_application_by_filter(name='sentry')[0].name SQLALCHEMY_DATABASE_URI = f'postgresql+psycopg2://{SENTRY_POSTGRES_APP.user}:{SENTRY_POSTGRES_APP.password}@{SENTRY_POSTGRES_APP.name}:{SENTRY_POSTGRES_APP.port}/{SENTRY_POSTGRES_APP.initialdb}' except: log.error("Cannot configure SENTRY")
"""Setup for the repository.""" import os from cloudharness import log from flask_sqlalchemy import SQLAlchemy from open_alchemy import init_yaml from .config import Config # Construct models try: db = SQLAlchemy() SPEC_FILE = os.path.join(Config.OPENAPI_DIR, Config.OPENAPI_FILE) MODELS_FILENAME = os.path.join(Config.BASE_DIR, "repository", "models.py") init_yaml(SPEC_FILE, base=db.Model, models_filename=MODELS_FILENAME) except: log.error("An error occurred while initializing the database", exc_info=True) "" def setup_db(app): global db db.init_app(app) db.create_all() from .repository.fixtures import create_fixtures create_fixtures(app) return db
import uuid from cloudharness import log as logger import workspaces.repository as repos import workspaces.service.events as events from workspaces.service.model_service import WorkspaceService try: from cloudharness.workflows import operations, tasks from cloudharness.workflows.argo import get_workflows except Exception as e: logger.error( "Cannot start workflows module. Probably this is related some problem with the kubectl configuration", e) ttl_strategy: dict = { 'secondsAfterCompletion': 60 * 60, 'secondsAfterSuccess': 60 * 20, 'secondsAfterFailure': 60 * 60 * 24 * 7 # one week } def delete_resource(workspace_resource, pvc_name, resource_path: str): logger.info( f"Delete workspace resource with id: {workspace_resource.id}, path: {resource_path}" ) shared_directory = f"{pvc_name}:/project_download" delete_task = tasks.CommandBasedTask( name="osb-delete-resource",
log.info("Topic name is: " + topic_name) assert len(sys.argv) > 1, 'Specify read path' shared_directory = sys.argv[1] log.info("Sending content of directory `{}` to event queue topic `{}`".format( shared_directory, topic_name)) assert os.path.exists(shared_directory), shared_directory + " does not exist." for file_path in glob.glob(f"{shared_directory}/*"): log.info("File `{}`".format(file_path)) size = os.path.getsize(file_path) if size > MAX_FILE_SIZE: log.warning( f"{file_path} size is {size}, which is greater than the maximum of {MAX_FILE_SIZE}." "The content will not be sent to the queue") notify_queue(topic_name, {file_path: "Error: size exceeded"}) log.info("Sending content for file `{}`".format(file_path)) try: with open(file_path) as f: content = f.read() except Exception as e: log.error("Error reading file " + file_path + " " + str(e)) continue notify_queue(topic_name, {os.path.basename(file_path): content})
import os import jwt import json import requests from keycloak import KeycloakAdmin from keycloak.exceptions import KeycloakAuthenticationError from cloudharness import log from cloudharness.middleware import get_authentication_token try: from cloudharness.utils.config import CloudharnessConfig as conf, ALLVALUES_PATH from cloudharness.applications import get_configuration except: log.error("Error on cloudharness configuration. Check that the values file %s your deployment.", ALLVALUES_PATH, exc_info=True) class AuthSecretNotFound(Exception): def __init__(self, secret_name): Exception.__init__(self, f"Secret {secret_name} not found.") def get_api_password() -> str: name = "api_user_password" AUTH_SECRET_PATH = os.environ.get("AUTH_SECRET_PATH", "/opt/cloudharness/resources/auth") try: with open(os.path.join(AUTH_SECRET_PATH, name)) as fh: return fh.read() except: # if no secrets folder or file exists
def send_event(message_type, operation, obj, uid="id", func_name=None, func_args=None, func_kwargs=None, topic_id=None): """ Send a CDC (change data capture) event into a topic The topic name is generated from the current app and message type e.g. workflows.cdc.jobs Params: message_type: the type of the message (relates to the object type) e.g. jobs operation: the operation on the object e.g. create / update / delete obj: the object itself uid: the unique identifier attribute of the object func_name: the caller function name defaults to None func_args: the caller function "args" defaults to None func_kwargs: the caller function "kwargs" defaults to None topic_id: the topic_id to use, generated when None, defaults to None """ if not topic_id: topic_id = EventClient.gen_topic_id(topic_type="cdc", message_type=message_type) ec = EventClient(topic_id=topic_id) try: if not isinstance(obj, dict): if hasattr(obj, "to_dict"): resource = obj.to_dict() else: resource = vars(obj) resource_id = resource.get(uid) try: # try to get the current user user = get_authclient().get_current_user() except KeycloakGetError: user = {} # serialize only the func args that can be serialized fargs = [] for a in func_args: try: fargs.append(loads(dumps(a))) except Exception as e: # argument can't be serialized pass # serialize only the func kwargs that can be serialized fkwargs = [] for kwa, kwa_val in func_kwargs.items(): try: fkwargs.append({kwa: loads(dumps(kwa_val))}) except Exception as e: # keyword argument can't be serialized pass # send the message ec.produce({ "meta": { "app_name": CURRENT_APP_NAME, "user": user, "func": str(func_name), "args": fargs, "kwargs": fkwargs, "description": f"{message_type} - {resource_id}", }, "message_type": message_type, "operation": operation, "uid": resource_id, "resource": resource }) log.info( f"sent cdc event {message_type} - {operation} - {resource_id}") except Exception as e: log.error('send_event error.', exc_info=True)
def change_pod_manifest(self: KubeSpawner): """ Application Hook to change the manifest of the notebook image before spawning it. Args: self (KubeSpawner): the spawner Returns: - """ # get the workspace cookie to determine the workspace id def get_from_cookie(cookie_name): cookie = self.handler.request.cookies.get(cookie_name, None) if cookie is None: raise Exception( "Required cookie not found. Check that the cookie named '%s' is set." % cookie_name) return cookie.value try: workspace_id = get_from_cookie('workspaceId') volume_name = f'workspace-{workspace_id}' log.info('Mapping to volume %s', volume_name) # We found the workspace id and now we try to mount the # workspace persistent volume claim ws_pvc = { 'name': volume_name, 'persistentVolumeClaim': { 'claimName': volume_name, 'spec': { 'accessModes': ['ReadWriteOnce', 'ReadOnlyMany'] } } } # add the volume to the pod if not [v for v in self.volumes if v['name'] == volume_name]: self.volumes.append(ws_pvc) workspace_owner = get_from_cookie('workspaceOwner') # Add labels to use for affinity labels = {'workspace': str(workspace_id), 'user': self.user.name} self.common_labels = labels self.extra_labels = labels self.pod_affinity_required.append(affinity_spec( 'user', self.user.name)) write_access = has_user_write_access(workspace_id, self.user, workspace_owner) if write_access: # Pods with write access must be on the same node self.pod_affinity_required.append( affinity_spec('workspace', workspace_id)) if not [v for v in self.volume_mounts if v['name'] == volume_name]: self.volume_mounts.append({ 'name': volume_name, 'mountPath': '/opt/workspace', 'readOnly': not write_access }) except Exception as e: log.error('Change pod manifest failed due to an error.', exc_info=True)