def ConnectToAzure(): """ Connect to Azure workspace, Compute Target, DataStore and Experiement """ # Connect to workspace # config.json file expected in ./azureml directory # config.json can be generated from the azure portal while browsing the workspace global az_workspace az_workspace = Workspace.from_config() print("Workspace:", az_workspace.name) # Connect to compute for training # compute target must belong to the workspace AND compute targets are limited by the workspace region # there may be ability to do cross workspace compute targets in the future global az_computetarget az_computetarget = ComputeTarget(workspace=az_workspace, name="AzPytrch-NC6") print("Compute Target:", az_computetarget.name) # Connect to the datastore for the training images # datastore must be associated with storage account belonging to workspace global az_datastore az_datastore = Datastore.get_default(az_workspace) print("Datastore:", az_datastore.name) # Connect to the experiment global az_experiment az_experiment = Experiment(workspace=az_workspace, name='616_Final') print("Experiment:", az_experiment.name)
def register_dataset( aml_workspace: Workspace, dataset_name: str, datastore_name: str, file_path: str = "COVID19Articles.csv", ) -> Dataset: if (datastore_name): datastore = Datastore.get(aml_workspace, datastore_name) else: datastore = Datastore.get_default(aml_workspace) dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path)) dataset = dataset.register(workspace=aml_workspace, name=dataset_name, create_new_version=True) return dataset
def main(): # get workspace and datastore env = Env() env.read_env("Azure_ML/foundation.env") ws = Workspace(env("AZURE_SUBSCRIPTION_ID"), env("RESOURCE_GROUP"), env("WORKSPACE_NAME")) # datastore = Datastore.get(ws, env("SOME_EXTERNAL_BLOB_DATASTORE_NAME")) datastore = Datastore.get_default(ws) df_all_data = prep.load_data() df_with_features = feat.generate_features(df_all_data) Dataset.Tabular.register_pandas_dataframe(df_with_features, (datastore, 'azure-ml-datasets'), 'survey_data_with_all_features')
def create_sample_data_csv(aml_workspace: Workspace, datastore_name: str, file_name: str = "COVID19Articles.csv", for_scoring: bool = False): url = \ "https://solliancepublicdata.blob.core.windows.net" + \ "/ai-in-a-day/lab-02/" df = pd.read_csv(url + file_name) if for_scoring: df = df.drop(columns=['cluster']) df.to_csv(file_name, index=False) if (datastore_name): datastore = Datastore.get(aml_workspace, datastore_name) else: datastore = Datastore.get_default(aml_workspace) datastore.upload_files( files=[file_name], overwrite=True, show_progress=False, )
def register_dataset_to_store(ws, df, name): datastore = Datastore.get_default(ws) TabularDatasetFactory.register_pandas_dataframe(df, datastore, name=name)
# COMMAND ---------- subscription_id = "f80606e5-788f-4dc3-a9ea-2eb9a7836082" resource_group = "rg-synapse-training" workspace_name = "mlworkspace-training" experiment_name = "satraining-nyc_taxi-20210525085738" ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name) experiment = Experiment(ws, experiment_name) # COMMAND ---------- df = spark.sql("SELECT * FROM tab_nyctaxi") datastore = Datastore.get_default(ws) dataset = TabularDatasetFactory.register_spark_dataframe(df, datastore, name = experiment_name + "-dataset") # COMMAND ---------- dataset = Dataset.get_by_name(ws, name='satraining-nyc_taxi-20210525085738-dataset') #dataset.to_pandas_dataframe() # COMMAND ---------- # MAGIC %md # MAGIC Class SynapseCompute: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information. # MAGIC # MAGIC 'linksyn-spark': id: /subscriptions/f80606e5-788f-4dc3-a9ea-2eb9a7836082/resourceGroups/rg-synapse-training/providers/Microsoft.MachineLearningServices/workspaces/mlworkspace-training/computes/linksyn-spark, # MAGIC name: linksyn-spark, # MAGIC tags: None,
def __init__(self, workspace): """ Setting resources to store your data """ self.workspace = workspace self.datastore = Datastore.get_default(self.workspace)