Python Datastore.upload Examples

Programming Language: Python

Namespace/Package Name: azureml.core

Class/Type: Datastore

Method/Function: upload

Examples at hotexamples.com: 5

Python Datastore.upload - 5 examples found. These are the top rated real world Python examples of azureml.core.Datastore.upload extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Datastore(30)

get(30)

register_azure_blob_container(30)

get_default(7)

path(7)

upload(5)

upload_files(5)

download(2)

register_azure_data_lake_gen2(2)

register_azure_sql_database(2)

_get_data_reference(1)

Example #1

Show file

    def __exit__(self, *exc_details):
        """Upload files for datastore.

        :param exc_details:
        :return:
        """
        from azureml.core.datastore import Datastore
        from azureml.data._dataprep_helper import dataprep

        module_logger.debug("Enter __exit__ function of datastore cmgr")
        for key, value in self._config.items():
            df_config, force_read = self._to_data_reference_config(value)
            if self._is_upload(df_config):
                self._validate_config(df_config, key)
                ds = Datastore(workspace=self._workspace,
                               name=df_config.data_store_name)
                if os.path.isdir(df_config.path_on_compute):
                    if self._is_datastore_adlsgen1(ds):
                        module_logger.debug(
                            "AzureDataLake Gen1 used as Datastore for upload dir."
                        )
                        dataprep().api.engineapi.api.get_engine_api(
                        ).upload_directory(
                            dataprep().api.engineapi.typedefinitions.
                            UploadDirectoryMessageArguments(
                                base_path=df_config.path_on_compute,
                                folder_path=df_config.path_on_compute,
                                destination=dataprep(
                                ).api._datastore_helper._to_stream_info_value(
                                    ds, df_config.path_on_data_store),
                                force_read=force_read,
                                overwrite=df_config.overwrite,
                                concurrent_task_count=1))
                    else:
                        ds.upload(src_dir=df_config.path_on_compute,
                                  target_path=df_config.path_on_data_store,
                                  overwrite=df_config.overwrite)
                elif os.path.isfile(df_config.path_on_compute):
                    if self._is_datastore_adlsgen1(ds):
                        module_logger.debug(
                            "AzureDataLake Gen1 used as Datastore for upload file."
                        )
                        dataprep().api.engineapi.api.get_engine_api(
                        ).upload_file(
                            dataprep().api.engineapi.typedefinitions.
                            UploadFileMessageArguments(
                                base_path=os.path.dirname(
                                    df_config.path_on_compute),
                                local_path=df_config.path_on_compute,
                                destination=dataprep(
                                ).api._datastore_helper._to_stream_info_value(
                                    ds, df_config.path_on_data_store),
                                force_read=force_read,
                                overwrite=df_config.overwrite))
                    else:
                        ds.upload_files(
                            files=[df_config.path_on_compute],
                            target_path=df_config.path_on_data_store,
                            overwrite=df_config.overwrite)
        module_logger.debug("Exit __exit__ function of datastore cmgr")

Example #2

Show file

File: aml_environment.py Project: arcus-azure/arcus.azureml

    def upload_dataset(self, dataset_name: str, local_folder: str, datastore_name: str = None, overwrite: bool = False, tags: dict = None) -> pd.DataFrame:
        '''
        Uploads data from a local directory into an AzureML Datastore that points to Azure Data lake
        Args:
            dataset_name (str): The name of the dataset to register
            local_folder (str): The location of the local directory to take files from
            datastore_path (str): The name of a DataStore that will contain the dataset
        Returns:
            FileDataset: The registered dataset, containing the files
        '''
        if not datastore_name:
            # No datastore name is given, so we'll take the default one
            datastore_name = self.__datastore_path

        # Connecting data store
        datastore = Datastore(self.__workspace, name=datastore_name)

        # TODO : check type of datastore
        datastore.upload(local_folder, dataset_name, overwrite, True)
        
        datastore_paths = [(datastore, dataset_name)]
        file_ds = Dataset.File.from_files(path=datastore_paths)

        file_ds = file_ds.register(workspace=self.__workspace,
                                 name=dataset_name,
                                 description=dataset_name, 
                                 tags = tags, create_new_version=True)

Example #3

Show file

# In[ ]:


# Default datastore (Azure file storage)
def_file_store = ws.get_default_datastore() 
print("Default datastore's name: {}".format(def_file_store.name))
def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))


# In[ ]:


# Upload the raw training data to the blob storage
def_blob_store.upload(src_dir=data_location, 
                      target_path='nyc-taxi-raw-features', 
                      overwrite=True, 
                      show_progress=True)

raw_train_data = DataReference(datastore=def_blob_store, 
                                      data_reference_name="nyc_taxi_raw_features", 
                                      path_on_datastore="nyc-taxi-raw-features/nyc-taxi-sample-data.csv")
print("DataReference object created")


# ### Create the Process Training Data Pipeline Step

# The intermediate data (or output of a Step) is represented by PipelineData object. PipelineData can be produced by one step and consumed in another step by providing the PipelineData object as an output of one step and the input of one or more steps.
# 
# The process training data pipeline step takes the raw_train_data DataReference object as input, and it will output an intermediate PipelineData object that holds the processed training data with the new engineered features for datetime components: hour of the day, and day of the week.
# 
# Review and run the cell below to construct the PipelineData objects and the PythonScriptStep pipeline step:

Example #4

Show file

#%% first save the files to disk
if (not os.path.exists("./Upload")):
    os.mkdir("./Upload")
    os.mkdir("./Upload/Data")
    os.mkdir("./Upload/Model")
    
df_pca.to_csv("./Upload/Data/data.csv", index=False)
pickle.dump( pca_model, open( "./Upload/Model/model.pkl", "wb" ) )

#%% now you can upload that directory to blobstorage
# I use the date to diferentiate the different versions
blob_path = f"Campus_Recruitment/{datetime.now().strftime('%Y-%m-%d')}"# if None will upload to root
local_path = "./Upload/Data"

blob_store.upload(src_dir=local_path, 
                  target_path=blob_path,
                  overwrite=True, 
                  show_progress=True)

#%% 
# ** Register the data as a dataset **
# %% now that the data is up on the blobstore we can register it as a dataset 
# to keep track of its versions and make it easily acessible
dataset = Dataset.File.from_files( blob_store.path(blob_path + "/data.csv") )
dataset.register(ws, 
                 name="Campus_Recruitment_PCA_Training_Data",
                 create_new_version=True)

#%% 
# ** Upload and register the model as a Model **
#%% 
model = Model.register(workspace=ws,

Example #5

Show file

# split the data using scikit-learn
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=float(test_size),
                                                    random_state=101)
# join train and train label; same for test
train = pd.concat([X_train, y_train], axis=1)
validation = pd.concat([X_test, y_test], axis=1)

# make sure folder_name was passed in as an argument
if not (folder_name is None):
    os.makedirs("files", exist_ok=True)
    print("%s created" % folder_name)

    # set the target path of the datastore to hold
    # test and validation datasets
    current_folder = str(datetime.now().date())

    target_path = os.path.join(folder_name, current_folder)

    train_file = os.path.join("files", train_file_name)
    val_file = os.path.join("files", val_file_name)
    # save the dataframes to the local drive to the upload the contents of thefolder
    train.to_csv(train_file, header=True, index=False)
    validation.to_csv(val_file, header=True, index=False)
    datastore.upload("files",
                     target_path=target_path,
                     overwrite=True,
                     show_progress=False)