def wrapper(*args, **kwargs): start_time = time.perf_counter() rv = func(*args, **kwargs) end_time = time.perf_counter() run_time = end_time - start_time msg = f'The function {func.__name__} completed in {run_time:.4f} ' +\ f'secs.' log_event(event_name='Time Measurement', message=msg, functionName=func.__name__, runTime=run_time) return rv
def upload_dataframe_to_gcs(dataframe: Any, bucket_name: str, object_name: str, metadata: Dict[str, Any] = None, **kwargs) -> bool: """ Upload dataframe to Google Cloud Storage as csv file artifact. Args: dataframe (Any): The data frame to upload. bucket_name (str): the bucket that will contain the artifact. object_name (str): The name of the artifact. metadata (Dict[str, Any], optional): The metadata of the artifact. Defaults to None. Returns: bool: True if the dataframe was uploaded, false otherwise. """ log_metadata = { 'funcName': 'upload_dataframe_to_gcs', 'eventGroup': 'Google Cloud Storage', 'environment': Environments.INFRA } try: with tmpf.NamedTemporaryFile(mode='r+', suffix='.csv') as tf: dataframe.to_csv(tf, **kwargs) result = upload_artifact( bucket_name, object_name, tf.name, metadata) return result except (AttributeError, OSError) as e: log_event(event_name='File Creation Error', message='Could not create the csv file.', description=str(e), severity=LogSeverities.ERROR, **log_metadata) return False except Exception as ex: msg = 'Could not upload the data frame due to an unexpected error.' log_event(event_name='Artifact Uploading Error', message=msg, description=str(ex), severity=LogSeverities.ERROR, bucketName=bucket_name, objectName=object_name, **log_metadata) return False
def create_bucket( bucket_name: str, app_name: str, storage_class: StorageClasses = StorageClasses.STANDARD, ) -> bool: """ Create a new bucket in Google Cloud Storage. Args: bucket_name (str): The name of the bucket. app_name (str): The name of the application using this function. This name is used to create a unique name for the bucket. storage_class (str): The storage class of the bucket. Possible options are: STANDARD, NEARLINE, COLDLINE. Defaults to StorageClasses.STANDARD. Returns: bool: True if the bucket was created, false otherwise. """ unique_name = app_name + '_' + bucket_name log_metadata = { 'bucketName': unique_name, 'funcName': 'create_bucket', 'eventGroup': 'Google Cloud Storage', 'environment': Environments.INFRA, } try: new_bucket = _gcs_client.bucket(unique_name) new_bucket.storage_class(storage_class.name.upper()) _gcs_client.create_bucket(new_bucket) log_event(event_name='Bucket Created', message='A new bucket was created', storageClass=storage_class.name.lower(), **log_metadata) return True except Conflict as ce: log_event(event_name='Bucket Error', message=str(ce), severity=LogSeverities.ERROR, **log_metadata) return False
def change_db(self, db_name: str): """Change the db instance and the current db name. Args: db_name (str): The requested db name. """ self._db = self._client.get_database(name=db_name) self._curr_db_name = db_name metadata = { 'oldDB': self._curr_db_name, 'newDB': db_name, 'className': 'MongoHandler', 'funcName': 'change_db', 'eventGroup': 'Mongo' } log_event(event_name='DB Changed', message='A user requested to change the db.', **metadata)
def create_collection(self, col_name: str, **options) -> bool: """Create a new collection if not already exists. Args: col_name (str): The name of the new collection **options: Options for the collection creation. For more information, see [Collection Creation](https://docs.mongodb.com/manual/reference/method/db.createCollection/#db.createCollection). Returns: bool: True if the collection was created else False. """ metadata = { 'collName': col_name, 'className': 'MongoHandler', 'funcName': 'create_collection', 'eventGroup': 'Mongo' } try: self._db.create_collection(col_name, **options) log_event(event_name='Collection Created', message='A new collection was created.', **metadata) return True except CollectionInvalid as cie: log_event(event_name='Collection Error', message=str(cie), severity=LogSeverities.ERROR, **metadata) return False except (ConnectionFailure, ServerSelectionTimeoutError) as err: msg = f'A connection error has occurred while trying to create '\ f'a collection.\nError message: {err}' print(msg) return False
def download_artifact(bucket_name: str, object_name: str, generation: int, dest_dir: str, dest_file_name: str) -> bool: """ Download an object from Google Cloud Storage and save it as a local file. Args: bucket_name (str): The bucket that contains the artifact. object_name (str): An 'object' is a place holder for the file itself. In general the object name is the path of the artifact inside GCS. It can be a directory-like name (e.g my/gcp/object) or a file-like name (e.g my_object). generation (int): The generation of the object. For more information see [object versioning](https://cloud.google.com/storage/docs/object-versioning). dest_dir (str): The local directory that will contain the artifact. dest_file_name (str): The artifact name on the local file system. Returns: bool: True if the artifact was downloaded, false otherwise. """ dest_full_path = os.path.abspath(os.path.join(dest_dir, dest_file_name)) server_ip = socket.gethostbyname(socket.gethostname()) log_metadata = { 'funcName': 'download_artifact', 'eventGroup': 'Google Cloud Storage', 'environment': Environments.INFRA, } try: bucket = _gcs_client.get_bucket(bucket_name) blob = bucket.get_blob(object_name=object_name, generation=generation) if blob is None: log_event(event_name='Artifact Downloading Error', message='The requested object does not exist.', severity=LogSeverities.WARNING, objectName=object_name, **log_metadata) return False blob.download_to_filename(dest_full_path) log_event(event_name='Artifact Download', message='Artifact downloading completed successfully.', objectName=object_name, bucketName=bucket_name, objectGeneration=generation, localFileLocation=dest_full_path, localServerIP=server_ip, **log_metadata) return True except NotFound as nfe: msg = 'Could not download the artifact.' log_event(event_name='Artifact Downloading Error', message=msg, description=str(nfe), severity=LogSeverities.ERROR, **log_metadata)
def delete_collection(self, col_name: str) -> bool: """Deletes a collection if exists. Args: col_name (str): The name of the collection. Returns: bool: True if the collection was deleted else False. """ metadata = { 'collName': col_name, 'className': 'MongoHandler', 'funcName': 'create_collection', 'eventGroup': 'Mongo' } try: result = self._db.drop_collection(col_name) if 'errmsg' in result: log_event(event_name='Collection Error', message=result['errmsg'], severity=LogSeverities.WARNING, **metadata) return False log_event(event_name='Collection Deleted', message='Collection was deleted.', **metadata) return True except (ConnectionFailure, ServerSelectionTimeoutError) as err: msg = f'A connection error has occurred while trying to delete '\ f'the collection.\nError message: {err}' print(msg) return False
def get_collection(self, col_name: str) -> Collection: """Get a collection from the current db. Args: col_name (str): The collection name. Returns: Collection: The collection instance. """ collection = self._db.get_collection(col_name) metadata = { 'collName': col_name, 'className': 'MongoHandler', 'funcName': 'get_collection', 'eventGroup': 'Mongo' } log_event(event_name='Collection Changed', message='A new collection was requested.', severity=LogSeverities.DEBUG, **metadata) return collection
def upload_artifact(bucket_name: str, object_name: str, file_path: str, metadata: Dict[str, Any] = None) -> bool: """ Upload an artifact to Google Cloud Storage under. An "artifact" can be any type of file in any size. Each artifact can be saved with its own metadata. Args: bucket_name (str): The bucket that contains the artifact. object_name (str): An 'object' is a place holder for the file itself. In general the object name is the path of the artifact inside GCS. It can be a directory-like name (e.g my/gcp/object) or a file-like name (e.g my_object). file_path (str): The location of the file to upload in the file system. metadata (Dict[str, Any], optional): The metadata of the artifact. Defaults to None. Returns: bool: True if the file was uploaded, false otherwise. """ log_metadata = { 'funcName': 'upload_artifact', 'eventGroup': 'Google Cloud Storage', 'environment': Environments.INFRA, } try: bucket = _gcs_client.get_bucket(bucket_name) blob = bucket.blob(object_name) blob.metadata(metadata) with open(file_path, 'rb') as f: blob.upload_from_file(f) log_event(event_name='Artifact Upload', message='Artifact uploading completed successfully.', bucketName=bucket_name, objectName=object_name, **log_metadata) return True except NotFound as nfe: msg = 'The requested bucket was not found.' log_event(event_name='Artifact Uploading Error', message=msg, description=str(nfe), severity=LogSeverities.ERROR, bucketName=bucket_name, **log_metadata) return False except GoogleCloudError as gce: msg = 'An error accrued while trying to upload the file.' log_event(event_name='Artifact Uploading Error', message=msg, description=str(gce), severity=LogSeverities.ERROR, objectName=object_name, **log_metadata) return False except FileNotFoundError as fnfe: log_event(event_name='Artifact Uploading Error', message=str(fnfe), severity=LogSeverities.ERROR, filePath=file_path, **log_metadata) return False
def download_artifacts_bunch(bucket_name: str, local_directory_path: str, data_cloud_path: str = None, activate_recursive_download: bool = False, activate_parallel_download: bool = False) -> bool: """ Download a bunch of artifact form Google Cloud Storage. Args: bucket_name (str): The bucket that stores the artifacts. local_directory_path (str): The local directory that will contain the downloaded artifacts. If not exists, automatically created. data_cloud_path (str, optional): A path to a subdirectory of the bucket or a wild card path for a group of files. For example: * /folder/to/download * /*.txt If not supplied all files under the requested bucket will be downloaded. Defaults to None. activate_recursive_download (bool, optional): True for downloading the artifacts of subfolders of the specified cloud path or bucket. Defaults to False. activate_parallel_download (bool, optional): True for using multithreaded download. Use this only if there is a large amount of artifacts to download. Defaults to False. Returns: bool: True if the artifacts were downloaded, false otherwise. """ log_metadata = { 'funcName': 'download_artifacts_bunch', 'eventGroup': 'Google Cloud Storage', 'environment': Environments.INFRA, } server_ip = socket.gethostbyname(socket.gethostname()) if not os.path.exists(local_directory_path): try: os.mkdir(local_directory_path) except OSError as ose: log_event(event_name='Directory Creation Error', message='Could not create the destination directory', description=str(ose), severity=LogSeverities.ERROR, localDirectoryPath=local_directory_path, **log_metadata) return False url = f'gs://{bucket_name}' if data_cloud_path: url = url + f'/{data_cloud_path}' download_command = ['gsutil', 'cp', url, local_directory_path] if activate_parallel_download: index = download_command.index('gsutil', 0, len(download_command)) download_command.insert(index + 1, '-m') if activate_recursive_download: index = download_command.index('cp', 0, len(download_command)) download_command.insert(index + 1, '-r') log_metadata.update({ 'bucketName': bucket_name, 'dataCloudLocation': data_cloud_path, 'isRecursiveDownload': activate_recursive_download, 'isParallelDownload': activate_parallel_download }) try: subprocess.run(download_command) log_event(event_name='Artifacts Bunch Download', message='Artifacts downloading completed successfully.', localDirectoryPath=local_directory_path, localServerIP=server_ip, **log_metadata) return True except Exception as e: msg = 'An unexpected error occurred while trying to download the ' +\ 'artifacts.' log_event(event_name='Artifacts Downloading Error', message=msg, description=str(e), severity=LogSeverities.ERROR, **log_metadata) return False
def update_collection_schema(self, col_name: str, schema: Dict[str, Any], validation_level: str = 'strict', validation_action: str = 'error') -> bool: """Apply a validation schema for a specified collection. For more information, see [Schema Validation](https://docs.mongodb.com/manual/core/schema-validation/). Args: col_name (str): The collection name. schema (Dict[str, Any]): Specifies validation rules or expressions for the collection. validation_level (str): Determines how strictly MongoDB applies the validation rules to existing documents during an update. Defaults to 'strict'. validation_action (str): Determines whether to error on invalid documents or just warn about the violations but allow invalid documents to be inserted. Defaults to 'error'. Returns: bool: True for success, False otherwise. """ metadata = { 'collName': col_name, 'className': 'MongoHandler', 'funcName': 'create_collection', 'validator': schema, 'validationLevel': validation_level, 'validationAction': validation_action, 'eventGroup': 'Mongo' } try: result = self._db.command('collMod', col_name, validator=schema, validationLevel=validation_level, validationAction=validation_action) if 'errmsg' in result: log_event(event_name='Collection Error', message=result['errmsg'], severity=LogSeverities.WARNING, **metadata) return False log_event(event_name='Collection Schema Updated', message='The new schema was applied.', **metadata) return True except (ConnectionFailure, ServerSelectionTimeoutError) as err: msg = f'A connection error has occurred while trying to update '\ f'the collection schema.\nError message: {err}' print(msg) return False except OperationFailure as ope: msg = 'The operation has failed, the schema was not updated.' log_event(event_name='Collection Error', message=msg, description=str(ope), severity=LogSeverities.ERROR, **metadata) return False