def wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        rv = func(*args, **kwargs)
        end_time = time.perf_counter()
        run_time = end_time - start_time
        msg = f'The function {func.__name__} completed in {run_time:.4f} ' +\
            f'secs.'
        log_event(event_name='Time Measurement',
                  message=msg,
                  functionName=func.__name__,
                  runTime=run_time)

        return rv
Exemple #2
0
def upload_dataframe_to_gcs(dataframe: Any,
                            bucket_name: str,
                            object_name: str,
                            metadata: Dict[str, Any] = None,
                            **kwargs) -> bool:
    """
    Upload dataframe to Google Cloud Storage as csv file artifact.

    Args:
        dataframe (Any): The data frame to upload.
        bucket_name (str): the bucket that will contain the artifact.
        object_name (str): The name of the artifact.
        metadata (Dict[str, Any], optional): The metadata of the artifact.
        Defaults to None.

    Returns:
        bool: True if the dataframe was uploaded, false otherwise.
    """
    log_metadata = {
        'funcName': 'upload_dataframe_to_gcs',
        'eventGroup': 'Google Cloud Storage',
        'environment': Environments.INFRA
    }

    try:
        with tmpf.NamedTemporaryFile(mode='r+', suffix='.csv') as tf:
            dataframe.to_csv(tf, **kwargs)
            result = upload_artifact(
                bucket_name, object_name, tf.name, metadata)

        return result
    except (AttributeError, OSError) as e:
        log_event(event_name='File Creation Error',
                  message='Could not create the csv file.',
                  description=str(e),
                  severity=LogSeverities.ERROR,
                  **log_metadata)

        return False
    except Exception as ex:
        msg = 'Could not upload the data frame due to an unexpected error.'
        log_event(event_name='Artifact Uploading Error',
                  message=msg,
                  description=str(ex),
                  severity=LogSeverities.ERROR,
                  bucketName=bucket_name,
                  objectName=object_name,
                  **log_metadata)

        return False
Exemple #3
0
def create_bucket(
    bucket_name: str,
    app_name: str,
    storage_class: StorageClasses = StorageClasses.STANDARD,
) -> bool:
    """
     Create a new bucket in Google Cloud Storage.

     Args:
         bucket_name (str): The name of the bucket.
         app_name (str): The name of the application using this function.
         This name is used to create a unique name for the bucket.
         storage_class (str): The storage class of the bucket.
         Possible options are: STANDARD, NEARLINE, COLDLINE.
         Defaults to StorageClasses.STANDARD.

     Returns:
         bool: True if the bucket was created, false otherwise.
     """
    unique_name = app_name + '_' + bucket_name
    log_metadata = {
        'bucketName': unique_name,
        'funcName': 'create_bucket',
        'eventGroup': 'Google Cloud Storage',
        'environment': Environments.INFRA,
    }

    try:
        new_bucket = _gcs_client.bucket(unique_name)
        new_bucket.storage_class(storage_class.name.upper())
        _gcs_client.create_bucket(new_bucket)
        log_event(event_name='Bucket Created',
                  message='A new bucket was created',
                  storageClass=storage_class.name.lower(),
                  **log_metadata)

        return True
    except Conflict as ce:
        log_event(event_name='Bucket Error',
                  message=str(ce),
                  severity=LogSeverities.ERROR,
                  **log_metadata)

        return False
    def change_db(self, db_name: str):
        """Change the db instance and the current db name.

        Args:
            db_name (str): The requested db name.
        """
        self._db = self._client.get_database(name=db_name)
        self._curr_db_name = db_name
        metadata = {
            'oldDB': self._curr_db_name,
            'newDB': db_name,
            'className': 'MongoHandler',
            'funcName': 'change_db',
            'eventGroup': 'Mongo'
        }

        log_event(event_name='DB Changed',
                  message='A user requested to change the db.',
                  **metadata)
    def create_collection(self, col_name: str, **options) -> bool:
        """Create a new collection if not already exists.

        Args:
            col_name (str): The name of the new collection
            **options: Options for the collection creation.
            For more information, see [Collection Creation](https://docs.mongodb.com/manual/reference/method/db.createCollection/#db.createCollection).

        Returns:
           bool: True if the collection was created else False.
        """
        metadata = {
            'collName': col_name,
            'className': 'MongoHandler',
            'funcName': 'create_collection',
            'eventGroup': 'Mongo'
        }

        try:
            self._db.create_collection(col_name, **options)

            log_event(event_name='Collection Created',
                      message='A new collection was created.',
                      **metadata)

            return True
        except CollectionInvalid as cie:
            log_event(event_name='Collection Error',
                      message=str(cie),
                      severity=LogSeverities.ERROR,
                      **metadata)

            return False
        except (ConnectionFailure, ServerSelectionTimeoutError) as err:
            msg = f'A connection error has occurred while trying to create '\
                f'a collection.\nError message: {err}'
            print(msg)

            return False
Exemple #6
0
def download_artifact(bucket_name: str, object_name: str, generation: int,
                      dest_dir: str, dest_file_name: str) -> bool:
    """
    Download an object from Google Cloud Storage and save it as a local file.

    Args:
        bucket_name (str): The bucket that contains the artifact.
        object_name (str): An 'object' is a place holder for the file itself.
        In general the object name is the path of the artifact inside GCS.
        It can be a directory-like name (e.g my/gcp/object) or a file-like name
        (e.g my_object).
        generation (int): The generation of the object.
        For more information see [object versioning](https://cloud.google.com/storage/docs/object-versioning).
        dest_dir (str): The local directory that will contain the artifact.
        dest_file_name (str): The artifact name on the local file system.

    Returns:
        bool: True if the artifact was downloaded, false otherwise.
    """
    dest_full_path = os.path.abspath(os.path.join(dest_dir, dest_file_name))
    server_ip = socket.gethostbyname(socket.gethostname())
    log_metadata = {
        'funcName': 'download_artifact',
        'eventGroup': 'Google Cloud Storage',
        'environment': Environments.INFRA,
    }

    try:
        bucket = _gcs_client.get_bucket(bucket_name)
        blob = bucket.get_blob(object_name=object_name, generation=generation)

        if blob is None:
            log_event(event_name='Artifact Downloading Error',
                      message='The requested object does not exist.',
                      severity=LogSeverities.WARNING,
                      objectName=object_name,
                      **log_metadata)

            return False

        blob.download_to_filename(dest_full_path)
        log_event(event_name='Artifact Download',
                  message='Artifact downloading completed successfully.',
                  objectName=object_name,
                  bucketName=bucket_name,
                  objectGeneration=generation,
                  localFileLocation=dest_full_path,
                  localServerIP=server_ip,
                  **log_metadata)

        return True
    except NotFound as nfe:
        msg = 'Could not download the artifact.'
        log_event(event_name='Artifact Downloading Error',
                  message=msg,
                  description=str(nfe),
                  severity=LogSeverities.ERROR,
                  **log_metadata)
    def delete_collection(self, col_name: str) -> bool:
        """Deletes a collection if exists.

        Args:
            col_name (str): The name of the collection.

        Returns:
            bool: True if the collection was deleted else False.
        """
        metadata = {
            'collName': col_name,
            'className': 'MongoHandler',
            'funcName': 'create_collection',
            'eventGroup': 'Mongo'
        }

        try:
            result = self._db.drop_collection(col_name)

            if 'errmsg' in result:
                log_event(event_name='Collection Error',
                          message=result['errmsg'],
                          severity=LogSeverities.WARNING,
                          **metadata)

                return False

            log_event(event_name='Collection Deleted',
                      message='Collection was deleted.',
                      **metadata)

            return True
        except (ConnectionFailure, ServerSelectionTimeoutError) as err:
            msg = f'A connection error has occurred while trying to delete '\
                f'the collection.\nError message: {err}'
            print(msg)

            return False
    def get_collection(self, col_name: str) -> Collection:
        """Get a collection from the current db.

        Args:
            col_name (str): The collection name.

        Returns:
            Collection: The collection instance.
        """
        collection = self._db.get_collection(col_name)
        metadata = {
            'collName': col_name,
            'className': 'MongoHandler',
            'funcName': 'get_collection',
            'eventGroup': 'Mongo'
        }

        log_event(event_name='Collection Changed',
                  message='A new collection was requested.',
                  severity=LogSeverities.DEBUG,
                  **metadata)

        return collection
Exemple #9
0
def upload_artifact(bucket_name: str,
                    object_name: str,
                    file_path: str,
                    metadata: Dict[str, Any] = None) -> bool:
    """
    Upload an artifact to Google Cloud Storage under.
    An "artifact" can be any type of file in any size.
    Each artifact can be saved with its own metadata.

    Args:
        bucket_name (str): The bucket that contains the artifact.
        object_name (str): An 'object' is a place holder for the file itself.
        In general the object name is the path of the artifact inside GCS.
        It can be a directory-like name (e.g my/gcp/object) or a file-like name
        (e.g my_object).
        file_path (str): The location of the file to upload in the file system.
        metadata (Dict[str, Any], optional): The metadata of the artifact.
        Defaults to None.

    Returns:
         bool: True if the file was uploaded, false otherwise.
    """
    log_metadata = {
        'funcName': 'upload_artifact',
        'eventGroup': 'Google Cloud Storage',
        'environment': Environments.INFRA,
    }

    try:
        bucket = _gcs_client.get_bucket(bucket_name)
        blob = bucket.blob(object_name)
        blob.metadata(metadata)

        with open(file_path, 'rb') as f:
            blob.upload_from_file(f)

        log_event(event_name='Artifact Upload',
                  message='Artifact uploading completed successfully.',
                  bucketName=bucket_name,
                  objectName=object_name,
                  **log_metadata)

        return True
    except NotFound as nfe:
        msg = 'The requested bucket was not found.'
        log_event(event_name='Artifact Uploading Error',
                  message=msg,
                  description=str(nfe),
                  severity=LogSeverities.ERROR,
                  bucketName=bucket_name,
                  **log_metadata)

        return False
    except GoogleCloudError as gce:
        msg = 'An error accrued while trying to upload the file.'
        log_event(event_name='Artifact Uploading Error',
                  message=msg,
                  description=str(gce),
                  severity=LogSeverities.ERROR,
                  objectName=object_name,
                  **log_metadata)

        return False
    except FileNotFoundError as fnfe:
        log_event(event_name='Artifact Uploading Error',
                  message=str(fnfe),
                  severity=LogSeverities.ERROR,
                  filePath=file_path,
                  **log_metadata)

        return False
Exemple #10
0
def download_artifacts_bunch(bucket_name: str,
                             local_directory_path: str,
                             data_cloud_path: str = None,
                             activate_recursive_download: bool = False,
                             activate_parallel_download: bool = False) -> bool:
    """
    Download a bunch of artifact form Google Cloud Storage.

    Args:
        bucket_name (str): The bucket that stores the artifacts.
        local_directory_path (str): The local directory that will contain the
        downloaded artifacts. If not exists, automatically created.
        data_cloud_path (str, optional): A path to a subdirectory of the
        bucket or a wild card path for a group of files.
        For example:
        * /folder/to/download
        * /*.txt
        If not supplied all files under the requested bucket will be
        downloaded. Defaults to None.
        activate_recursive_download (bool, optional): True for downloading the
        artifacts of subfolders of the specified cloud path or bucket.
        Defaults to False.
        activate_parallel_download (bool, optional): True for using
        multithreaded download. Use this only if there is a large amount of
        artifacts to download. Defaults to False.

    Returns:
        bool: True if the artifacts were downloaded, false otherwise.
    """
    log_metadata = {
        'funcName': 'download_artifacts_bunch',
        'eventGroup': 'Google Cloud Storage',
        'environment': Environments.INFRA,
    }
    server_ip = socket.gethostbyname(socket.gethostname())

    if not os.path.exists(local_directory_path):
        try:
            os.mkdir(local_directory_path)
        except OSError as ose:
            log_event(event_name='Directory Creation Error',
                      message='Could not create the destination directory',
                      description=str(ose),
                      severity=LogSeverities.ERROR,
                      localDirectoryPath=local_directory_path,
                      **log_metadata)

        return False

    url = f'gs://{bucket_name}'

    if data_cloud_path:
        url = url + f'/{data_cloud_path}'

    download_command = ['gsutil', 'cp', url, local_directory_path]

    if activate_parallel_download:
        index = download_command.index('gsutil', 0, len(download_command))
        download_command.insert(index + 1, '-m')
    if activate_recursive_download:
        index = download_command.index('cp', 0, len(download_command))
        download_command.insert(index + 1, '-r')

    log_metadata.update({
        'bucketName': bucket_name,
        'dataCloudLocation': data_cloud_path,
        'isRecursiveDownload': activate_recursive_download,
        'isParallelDownload': activate_parallel_download
    })

    try:
        subprocess.run(download_command)
        log_event(event_name='Artifacts Bunch Download',
                  message='Artifacts downloading completed successfully.',
                  localDirectoryPath=local_directory_path,
                  localServerIP=server_ip,
                  **log_metadata)

        return True
    except Exception as e:
        msg = 'An unexpected error occurred while trying to download the ' +\
            'artifacts.'
        log_event(event_name='Artifacts Downloading Error',
                  message=msg,
                  description=str(e),
                  severity=LogSeverities.ERROR,
                  **log_metadata)

        return False
    def update_collection_schema(self,
                                 col_name: str,
                                 schema: Dict[str, Any],
                                 validation_level: str = 'strict',
                                 validation_action: str = 'error') -> bool:
        """Apply a validation schema for a specified collection.
        For more information, see [Schema Validation](https://docs.mongodb.com/manual/core/schema-validation/).
        Args:
            col_name (str): The collection name.
            schema (Dict[str, Any]): Specifies validation rules or expressions
            for the collection.
            validation_level (str): Determines how strictly MongoDB applies
            the validation rules to existing documents during an update.
            Defaults to 'strict'.
            validation_action (str): Determines whether to error on invalid
            documents or just warn about the violations but allow invalid
            documents to be inserted. Defaults to 'error'.

        Returns:
            bool: True for success, False otherwise.
        """
        metadata = {
            'collName': col_name,
            'className': 'MongoHandler',
            'funcName': 'create_collection',
            'validator': schema,
            'validationLevel': validation_level,
            'validationAction': validation_action,
            'eventGroup': 'Mongo'
        }

        try:
            result = self._db.command('collMod',
                                      col_name,
                                      validator=schema,
                                      validationLevel=validation_level,
                                      validationAction=validation_action)

            if 'errmsg' in result:
                log_event(event_name='Collection Error',
                          message=result['errmsg'],
                          severity=LogSeverities.WARNING,
                          **metadata)

                return False

            log_event(event_name='Collection Schema Updated',
                      message='The new schema was applied.',
                      **metadata)

            return True
        except (ConnectionFailure, ServerSelectionTimeoutError) as err:
            msg = f'A connection error has occurred while trying to update '\
                f'the collection schema.\nError message: {err}'
            print(msg)

            return False
        except OperationFailure as ope:
            msg = 'The operation has failed, the schema was not updated.'
            log_event(event_name='Collection Error',
                      message=msg,
                      description=str(ope),
                      severity=LogSeverities.ERROR,
                      **metadata)

            return False