def __enter__(self): """Download files for datastore. :return: """ module_logger.debug("Enter __enter__ function of datastore cmgr") from azureml.core import Datastore, Dataset for key, value in self._config.items(): df_config, _ = self._to_data_reference_config(value) if self._is_upload(df_config): if df_config.path_on_compute: dir_to_create = os.path.normpath( os.path.dirname(df_config.path_on_compute)) if dir_to_create: _safe_mkdirs(dir_to_create) else: target_path = df_config.data_store_name if df_config.path_on_compute: target_path = os.path.join(df_config.data_store_name, df_config.path_on_compute) # The target_path is always set using the data store name with no way # for the user to overwrite this behavior. The user might attempt to use ../ in # the path on compute as a solution but this throws an exception # because the path is not normalized. # Normalizing the path to allow the user to use up-level references. target_path = os.path.normpath(target_path) if self._is_download(df_config): self._validate_config(df_config, key) ds = Datastore(workspace=self._workspace, name=df_config.data_store_name) if self._is_datastore_adlsgen1(ds): _log_and_print( "AzureDataLake Gen1 used as Datastore for download" ) if df_config.path_on_data_store is None: df_config.path_on_data_store = "" Dataset.File.from_files( (ds, df_config.path_on_data_store)).download( os.path.join(target_path, df_config.path_on_data_store), overwrite=df_config.overwrite) else: count = ds.download( target_path=target_path, prefix=df_config.path_on_data_store, overwrite=df_config.overwrite) if count == 0: import warnings warnings.warn( "Downloaded 0 files from datastore {} with path {}." .format(ds.name, df_config.path_on_data_store)) else: _safe_mkdirs(target_path) module_logger.debug("Exit __enter__ function of datastore cmgr")
#%% List all blobstores for store in ws.datastores: print(store) #%% connect to a datastore by name blob_store = Datastore(ws, "workspaceblobstore") print("\n" + f"Connected to blobstore: {blob_store.name}") #%% # ** Downloading training data from the Blobstore ** #%% download the file from the blobstore to your local machine (or compute instance) blob_path = "Campus_Recruitment/Raw_Data"# if None will download whole blob local_path = "./" blob_store.download(target_path=local_path, prefix=blob_path, overwrite=True, show_progress=True) #%% Load the data into memory df = pd.read_csv("./Campus_Recruitment/Raw_Data/datasets_596958_1073629_Placement_Data_Full_Class.csv") df[:10] #%% # ** Transform the data for use with our model ** #%% transform some column values into numeric and categorical df['male'] = df['gender'].map({'M':1, 'F':0}) df['education_board_is_central'] = df['ssc_b'].map({'Central':1, 'Others':0}) df['hsc_s'] = df['hsc_s'].map({'Commerce':0 , 'Science':1 , 'Arts':2}) df['workex_bool'] = df['workex'].map({'Yes':1, 'No':0}) #%% drops nan values in columns we are using for the pca