Exemple #1
0
def initialize_db():
    global db
    get_env_creds()
    if not db:
        firebase_admin.initialize_app()
        db = firestore.client()
    return db
Exemple #2
0
def gcs_file_upload(bucket_name, file_type, file_path):
    """Uploads a blob to the bucket."""
    # file_path = "local/path/to/file"
    # GCS_BUCKET_NAME = "bucketname"
    # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
    try:
        # source and destination file name are kept same
        get_env_creds()
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        file_name = file_from_path(file_path)
        blob = bucket.blob(file_type + "/" + file_name)
        blob.upload_from_filename(file_path)
        print("File {} has been uploaded".format(file_name))
    except NameError:
        print("Please set the environment variable {}".format(GCS_BUCKET_NAME))
Exemple #3
0
def deploy(model_type, model_location, model_path, **kwargs):
    '''
    model_path is path inside cloud bucket, not local path
    '''
    try:
        get_env_creds()
        file_name = get_filename_noext(model_path)
        model_name = filter_alpha(file_name)
        date = get_date_for_id()
        model_id = "{}-{}".format(model_name, date)

        region = kwargs.get(GCP_REGION, os.environ.get(
            GCP_REGION, GCP_DEFAULT_REGION))
        auth = kwargs.get(CLOUD_RUN_AUTH, os.environ.get(
            CLOUD_RUN_AUTH, CLOUD_RUN_DEFAULT_AUTH))

        local_model_path = gcs_download_file(
            model_path) if model_location == CLOUD else model_path

        if model_location == LOCAL:
            upload(MODELS, local_model_path)

        if model_type == SKLEARN:
            get_container_files(model_type)
            add_env_to_dockerfile()
            gcp_auth()
            gcp_setproject()
            upload_model_metadata(local_model_path, model_id, model_type)
            build_container(model_id)
            deploy_container(model_id, region, auth)
        elif model_type == TF:
            get_container_files(model_type)
            # adding .pb to ensure .pb file is always picked
            add_modelfilename_to_dockerfile(
                file_name+".pb", model_type)
            gcp_auth()
            gcp_setproject()
            upload_model_metadata(local_model_path, model_id, model_type)
            # cloud build yaml file has build + deploy
            build_container(model_id)
            deploy_container(model_id, region, auth)
        cleanup_postdeploy()
    except:
        raise Exception(DEPLOY_ERROR)
Exemple #4
0
def gcs_list_files(bucket_name, prefix, delimiter=None):
    """Lists all the blobs in the bucket that begin with the prefix.

    This can be used to list all blobs in a "folder", e.g. "public/".

    The delimiter argument can be used to restrict the results to only the
    "files" in the given "folder". Without the delimiter, the entire tree under
    the prefix is returned. For example, given these blobs:

        a/1.txt
        a/b/2.txt

    If you just specify prefix = 'a', you'll get back:

        a/1.txt
        a/b/2.txt

    However, if you specify prefix='a' and delimiter='/', you'll get back:

        a/1.txt

    Additionally, the same request will return blobs.prefixes populated with:

        a/b/
    """
    # Note: Client.list_blobs requires at least package version 1.17.0.
    # https://cloud.google.com/storage/docs/listing-objects
    get_env_creds()
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)

    files_list = storage_client.list_blobs(bucket,
                                           prefix=prefix,
                                           delimiter=delimiter)
    files_list = files_list.prefixes if delimiter else files_list
    directory = prefix
    directory += delimiter if delimiter else ""
    print("Files in {}:".format(directory))
    for file_name in files_list:
        print(" -> {}".format(file_name.name))
Exemple #5
0
def submit_pyspark_job(spark_job_path, **kwargs):
    get_env_creds()
    project_id = kwargs.get(PROJECT_ID, os.environ[PROJECT_ID])
    region = kwargs.get(GCP_REGION,
                        os.environ.get(GCP_REGION, GCP_DEFAULT_REGION))
    cluster_name = kwargs.get(GCP_DATAPROC_CLUSTER,
                              os.environ[GCP_DATAPROC_CLUSTER])
    bucket_name = kwargs.get(GCS_BUCKET_NAME, os.environ[GCS_BUCKET_NAME])

    _, dataproc_job_client = set_cluster_clients()

    spark_bucket_output_path = kwargs.get(
        OUTPUT_PATH_ARG, "gs://{}/{}".format(bucket_name, DATA))

    mod_job_path = add_outputpath_to_job(spark_job_path,
                                         spark_bucket_output_path)
    gcs_file_upload(bucket_name, DATAPROC, mod_job_path)

    submit_dataproc_pyspark_job(dataproc_job_client, mod_job_path,
                                spark_bucket_output_path, project_id, region,
                                cluster_name, bucket_name)

    os.remove(mod_job_path)
Exemple #6
0
def gcs_download_file(source_file_path):
    """Downloads a blob from the bucket."""
    # GCS_BUCKET_NAME = "your-bucket-name"
    # source_file_path = "models/storage-object-name"
    # destination_file_name = "local/path/to/file"

    try:
        get_env_creds()
        storage_client = storage.Client()
        bucket = storage_client.bucket(os.environ[GCS_BUCKET_NAME])
        blob = bucket.blob(source_file_path)
        model_file_name = source_file_path.rsplit('/', 1)[1]
        # currently will only download to current directory
        set_cwd()
        destination_file_name = os.getcwd() + "/" + model_file_name
        blob.download_to_filename(destination_file_name)

        print("Blob {} downloaded to {}.".format(source_file_path,
                                                 destination_file_name))
        return destination_file_name
    except FileNotFoundError:
        print(
            "The file either doesn't exist in the bucket or hasn't been specified"
        )
Exemple #7
0
def list_files(list_type, **kwargs):
    get_env_creds()
    bucket_name = kwargs.get(GCS_BUCKET_NAME, os.environ[GCS_BUCKET_NAME])
    gcs_list_files(bucket_name, list_type)
Exemple #8
0
def upload(file_type, file_path, **kwargs):
    get_env_creds()
    bucket_name = kwargs.get(GCS_BUCKET_NAME, os.environ[GCS_BUCKET_NAME])
    gcs_file_upload(bucket_name, file_type, file_path)