def ConnectToAzure():
    """
        Connect to Azure workspace, Compute Target, DataStore and Experiement
    """

    # Connect to workspace
    # config.json file expected in ./azureml directory
    # config.json can be generated from the azure portal while browsing the workspace
    global az_workspace
    az_workspace = Workspace.from_config()
    print("Workspace:", az_workspace.name)

    # Connect to compute for training
    # compute target must belong to the workspace AND compute targets are limited by the workspace region
    # there may be ability to do cross workspace compute targets in the future
    global az_computetarget
    az_computetarget = ComputeTarget(workspace=az_workspace,
                                     name="AzPytrch-NC6")
    print("Compute Target:", az_computetarget.name)

    # Connect to the datastore for the training images
    # datastore must be associated with storage account belonging to workspace
    global az_datastore
    az_datastore = Datastore.get_default(az_workspace)
    print("Datastore:", az_datastore.name)

    # Connect to the experiment
    global az_experiment
    az_experiment = Experiment(workspace=az_workspace, name='616_Final')
    print("Experiment:", az_experiment.name)
def register_dataset(
    aml_workspace: Workspace,
    dataset_name: str,
    datastore_name: str,
    file_path: str = "COVID19Articles.csv",
) -> Dataset:
    if (datastore_name):
        datastore = Datastore.get(aml_workspace, datastore_name)
    else:
        datastore = Datastore.get_default(aml_workspace)
    dataset = Dataset.Tabular.from_delimited_files(path=(datastore, file_path))
    dataset = dataset.register(workspace=aml_workspace,
                               name=dataset_name,
                               create_new_version=True)

    return dataset
Esempio n. 3
0
def main():
    # get workspace and datastore
    env = Env()
    env.read_env("Azure_ML/foundation.env")
    ws = Workspace(env("AZURE_SUBSCRIPTION_ID"), env("RESOURCE_GROUP"),
                   env("WORKSPACE_NAME"))
    # datastore = Datastore.get(ws, env("SOME_EXTERNAL_BLOB_DATASTORE_NAME"))
    datastore = Datastore.get_default(ws)

    df_all_data = prep.load_data()

    df_with_features = feat.generate_features(df_all_data)

    Dataset.Tabular.register_pandas_dataframe(df_with_features,
                                              (datastore, 'azure-ml-datasets'),
                                              'survey_data_with_all_features')
def create_sample_data_csv(aml_workspace: Workspace,
                           datastore_name: str,
                           file_name: str = "COVID19Articles.csv",
                           for_scoring: bool = False):

    url = \
        "https://solliancepublicdata.blob.core.windows.net" + \
        "/ai-in-a-day/lab-02/"
    df = pd.read_csv(url + file_name)
    if for_scoring:
        df = df.drop(columns=['cluster'])
    df.to_csv(file_name, index=False)

    if (datastore_name):
        datastore = Datastore.get(aml_workspace, datastore_name)
    else:
        datastore = Datastore.get_default(aml_workspace)
    datastore.upload_files(
        files=[file_name],
        overwrite=True,
        show_progress=False,
    )
Esempio n. 5
0
def register_dataset_to_store(ws, df, name):
    datastore = Datastore.get_default(ws)
    TabularDatasetFactory.register_pandas_dataframe(df, datastore, name=name)
Esempio n. 6
0
# COMMAND ----------

subscription_id = "f80606e5-788f-4dc3-a9ea-2eb9a7836082"
resource_group = "rg-synapse-training"
workspace_name = "mlworkspace-training"
experiment_name = "satraining-nyc_taxi-20210525085738"

ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)
experiment = Experiment(ws, experiment_name)

# COMMAND ----------

df = spark.sql("SELECT * FROM tab_nyctaxi")

datastore = Datastore.get_default(ws)
dataset = TabularDatasetFactory.register_spark_dataframe(df, datastore, name = experiment_name + "-dataset")

# COMMAND ----------

dataset =  Dataset.get_by_name(ws, name='satraining-nyc_taxi-20210525085738-dataset')
#dataset.to_pandas_dataframe()

# COMMAND ----------

# MAGIC %md
# MAGIC Class SynapseCompute: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
# MAGIC 
# MAGIC 'linksyn-spark': id: /subscriptions/f80606e5-788f-4dc3-a9ea-2eb9a7836082/resourceGroups/rg-synapse-training/providers/Microsoft.MachineLearningServices/workspaces/mlworkspace-training/computes/linksyn-spark,
# MAGIC  name: linksyn-spark,
# MAGIC  tags: None,
Esempio n. 7
0
 def __init__(self, workspace):
     """
     Setting resources to store your data
     """
     self.workspace = workspace
     self.datastore = Datastore.get_default(self.workspace)