Beispiel #1
0
    def __enter__(self):
        """Download files for datastore.

        :return:
        """
        module_logger.debug("Enter __enter__ function of datastore cmgr")
        from azureml.core import Datastore, Dataset
        for key, value in self._config.items():
            df_config, _ = self._to_data_reference_config(value)
            if self._is_upload(df_config):
                if df_config.path_on_compute:
                    dir_to_create = os.path.normpath(
                        os.path.dirname(df_config.path_on_compute))
                    if dir_to_create:
                        _safe_mkdirs(dir_to_create)
            else:
                target_path = df_config.data_store_name
                if df_config.path_on_compute:
                    target_path = os.path.join(df_config.data_store_name,
                                               df_config.path_on_compute)
                    # The target_path is always set using the data store name with no way
                    # for the user to overwrite this behavior. The user might attempt to use ../ in
                    # the path on compute as a solution but this throws an exception
                    # because the path is not normalized.
                    # Normalizing the path to allow the user to use up-level references.
                    target_path = os.path.normpath(target_path)
                if self._is_download(df_config):
                    self._validate_config(df_config, key)
                    ds = Datastore(workspace=self._workspace,
                                   name=df_config.data_store_name)
                    if self._is_datastore_adlsgen1(ds):
                        _log_and_print(
                            "AzureDataLake Gen1 used as Datastore for download"
                        )
                        if df_config.path_on_data_store is None:
                            df_config.path_on_data_store = ""
                        Dataset.File.from_files(
                            (ds, df_config.path_on_data_store)).download(
                                os.path.join(target_path,
                                             df_config.path_on_data_store),
                                overwrite=df_config.overwrite)
                    else:
                        count = ds.download(
                            target_path=target_path,
                            prefix=df_config.path_on_data_store,
                            overwrite=df_config.overwrite)
                        if count == 0:
                            import warnings
                            warnings.warn(
                                "Downloaded 0 files from datastore {} with path {}."
                                .format(ds.name, df_config.path_on_data_store))
                else:
                    _safe_mkdirs(target_path)

        module_logger.debug("Exit __enter__ function of datastore cmgr")
Beispiel #2
0
#%% List all blobstores
for store in ws.datastores:
    print(store)
    
#%% connect to a datastore by name
blob_store = Datastore(ws, "workspaceblobstore")
print("\n" + f"Connected to blobstore: {blob_store.name}")

#%% 
# ** Downloading training data from the Blobstore **
#%% download the file from the blobstore to your local machine (or compute instance)
blob_path = "Campus_Recruitment/Raw_Data"# if None will download whole blob
local_path = "./"

blob_store.download(target_path=local_path,
                    prefix=blob_path, 
                    overwrite=True, 
                    show_progress=True)

#%% Load the data into memory
df = pd.read_csv("./Campus_Recruitment/Raw_Data/datasets_596958_1073629_Placement_Data_Full_Class.csv")
df[:10]

#%% 
# ** Transform the data for use with our model **
#%% transform some column values into numeric and categorical
df['male'] = df['gender'].map({'M':1, 'F':0})
df['education_board_is_central'] = df['ssc_b'].map({'Central':1, 'Others':0})
df['hsc_s'] = df['hsc_s'].map({'Commerce':0 , 'Science':1 , 'Arts':2})
df['workex_bool'] = df['workex'].map({'Yes':1, 'No':0})

#%% drops nan values in columns we are using for the pca