Exemple #1
0
def downloadPickles(ws, modelName, outputPath="./pickles", modelVer=None):
    if modelVer == 'best':
        bestModel = None
        maxAcc = -1
        for model in Model.list(ws, modelName, ["accuracy"]):
            modelAcc = float(model.tags["accuracy"])
            if modelAcc > maxAcc:
                bestModel = model
                maxAcc = modelAcc

        print(f"### Best model with highest accuracy of {maxAcc} found")

        if not bestModel:
            model = Model(ws, modelName)
            print("### WARNING! No best model found, using latest instead")
    elif modelVer is not None:
        model = Model(ws, modelName, version=modelVer)
    else:
        model = Model(ws, modelName)

    print(f"### Using model version {model.version}")
    # Echo'ing out this magic string sets an output variable in Azure DevOps pipeline
    # Set AZML_MODEL_VER for use by subsequent steps
    print(f"##vso[task.setvariable variable=AZML_MODEL_VER]{model.version}")

    # These are special tags, lets us get back to the run that created the model
    try:
        runId = model.tags['aml-runid']
        experimentName = model.tags['aml-experiment']
    except:
        print(
            "### ERROR! Model missing `aml-runid` and `aml-experiment` tags, Can't continue!"
        )
        exit()

    exp = Experiment(workspace=ws, name=experimentName)
    run = Run(exp, runId)
    if run.status != "Completed":
        print(f'### ERROR! Run {runId} did not complete!')
        return

    print(f'### Will download from run {runId} in {experimentName}')

    # Now we can get all the files created with the run, grab all the .pkls
    for f in run.get_file_names():
        if f.endswith('.pkl'):
            output_file_path = os.path.join(outputPath, f.split('/')[-1])
            print('### Downloading from {} to {} ...'.format(
                f, output_file_path))
            run.download_file(name=f, output_file_path=output_file_path)

    # Add some extra metadata, handy to have
    metadata = {
        'name': model.name,
        'version': model.version,
        'tags': model.tags
    }
    with open(f"{outputPath}/metadata.json", 'w') as metadata_file:
        print(f"### Storing metadata in {outputPath}/metadata.json")
        json.dump(metadata, metadata_file)
Exemple #2
0
def download_run_outputs_by_prefix(
        blobs_prefix: Path, destination: Path, run: Run
) -> None:
    """
    Download all the blobs from the run's default output directory: DEFAULT_AML_UPLOAD_DIR ("outputs") that
    have a given prefix (folder structure). When saving, the prefix string will be stripped off. For example,
    if blobs_prefix = "foo", and the run has a file "outputs/foo/bar.csv", it will be downloaded to destination/bar.csv.
    If there is in addition a file "foo.txt", that file will be skipped.
    :param blobs_prefix: The prefix for all files in "outputs" that should be downloaded.
    :param run: The AzureML run to download the files from.
    :param destination: Local path to save the downloaded blobs to.
    """
    prefix_str = str((fixed_paths.DEFAULT_AML_UPLOAD_DIR / blobs_prefix).as_posix())
    logging.info(
        f"Downloading multiple files from run {run.id}: {prefix_str} -> {str(destination)}"
    )
    # There is a download_files function, but that can time out when downloading several large checkpoints file
    # (120sec timeout for all files).
    for file in run.get_file_names():
        if file.startswith(prefix_str):
            target_path = file[len(prefix_str):]
            if target_path.startswith("/"):
                target_path = target_path[1:]
                logging.info(f"Downloading {file}")
                run.download_file(
                    file, str(destination / target_path), _validate_checksum=True
                )
            else:
                logging.warning(
                    f"Skipping file {file}, because the desired prefix {prefix_str} is not aligned with "
                    f"the folder structure"
                )
def download_model_file(run_id: str, remote_file: str, local_file: str) -> None:
    """Download the model.pt file for the corresponding run_id to the local
    directory. Then it is uploaded to Azure when run. Could be other ways
    to acomplish this (copy directly from run to run perhaps)"""
    ws = Workspace.from_config()
    experiment = Experiment(workspace=ws, name="hm-2016")
    run = Run(experiment, run_id)
    print(model_file)
    run.download_file(remote_file, local_file)
Exemple #4
0
def download_model(workspace, experiment_name, run_id, input_location,
                   output_location):
    """
    Download the pretrained model
    Input:
         workspace: workspace to access the experiment
         experiment_name: Name of the experiment in which model is saved
         run_id: Run Id of the experiment in which model is pre-trained
         input_location: Input location in a RUN Id
         output_location: Location for saving the model
    """
    experiment = Experiment(workspace=workspace, name=experiment_name)
    #Download the model on which evaluation need to be done
    run = Run(experiment, run_id=run_id)
    #run.get_details()
    run.download_file(input_location, output_location)
    logger.info("Successfully downloaded model")
def get_job_log_file(run: Run, index: Optional[int] = None) -> str:
    """
    Reads the job log file (70_driver_log.txt or std_log.txt) of the given job. If an index is provided, get
    the matching file from a multi-node job.
    :return: The contents of the job log file.
    """
    assert run.status == RunStatus.COMPLETED
    files = run.get_file_names()
    suffix = (f"_{index}" if index is not None else "") + ".txt"
    file1 = "azureml-logs/70_driver_log" + suffix
    file2 = "user_logs/std_log" + suffix
    if file1 in files:
        file = file1
    elif file2 in files:
        file = file2
    else:
        raise ValueError(f"No log file ({file1} or {file2}) present in the run. Existing files: {files}")
    downloaded = tempfile.NamedTemporaryFile().name
    run.download_file(name=file, output_file_path=downloaded)
    return Path(downloaded).read_text()
def download_model(ws, experiment_name, run_id, input_location, output_location):
    """Download the pretrained model

    Args:
         ws: workspace to access the experiment
         experiment_name: Name of the experiment in which model is saved
         run_id: Run Id of the experiment in which model is pre-trained
         input_location: Input location in a RUN Id
         output_location: Location for saving the model
    """
    experiment = Experiment(workspace=ws, name=experiment_name)
    # Download the model on which evaluation need to be done
    run = Run(experiment, run_id=run_id)
    if input_location.endswith(".h5"):
        run.download_file(input_location, output_location)
    elif input_location.endswith(".ckpt"):
        run.download_files(prefix=input_location,
                           output_directory=output_location)
    else:
        raise NameError(f"{input_location}'s path extension not supported")
    print("Successfully downloaded model")
Exemple #7
0
def download_run_output_file(blob_path: Path, destination: Path, run: Run) -> Path:
    """
    Downloads a single file from the run's default output directory: DEFAULT_AML_UPLOAD_DIR ("outputs").
    For example, if blobs_path = "foo/bar.csv", then the run result file "outputs/foo/bar.csv" will be downloaded
    to <destination>/bar.csv (the directory will be stripped off).
    :param blob_path: The name of the file to download.
    :param run: The AzureML run to download the files from
    :param destination: Local path to save the downloaded blob to.
    :return: Destination path to the downloaded file(s)
    """
    blobs_prefix = str((fixed_paths.DEFAULT_AML_UPLOAD_DIR / blob_path).as_posix())
    destination = destination / blob_path.name
    logging.info(
        f"Downloading single file from run {run.id}: {blobs_prefix} -> {str(destination)}"
    )
    try:
        run.download_file(blobs_prefix, str(destination), _validate_checksum=True)
    except Exception as ex:
        raise ValueError(
            f"Unable to download file '{blobs_prefix}' from run {run.id}"
        ) from ex
    return destination
Exemple #8
0
    sys.exit(0)

run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace=ws, name=experiment_name)

run = Run(experiment=exp, run_id=run_id)
names = run.get_file_names
names()
print("Run ID for last run: {}".format(run_id))
model_local_dir = "model"
os.makedirs(model_local_dir, exist_ok=True)

# Download Model to Project root directory
model_name = "arima_model.pkl"
run.download_file(name="./outputs/" + model_name,
                  output_file_path="./model/" + model_name)
print("Downloaded model {} to Project root directory".format(model_name))
os.chdir("./model")
model = Model.register(
    model_path=model_name,  # this points to a local file
    model_name=model_name,  # this is the name the model is registered as
    tags={
        "area": "robberies",
        "type": "forecasting",
        "run_id": run_id
    },
    description="Time series forecasting model for Adventure Works dataset",
    workspace=ws,
)
os.chdir("..")
print(
minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print("Best run_id: " + minimum_rmse_runid)
print("Best run_id rmse: " + str(minimum_rmse))

from azureml.core import Run
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

best_run.download_file(name="model_alpha_0.1.pkl")

model = best_run.register_model(model_name='diabetes',
                                model_path='model_alpha_0.1.pkl')
print(model.name, model.id, model.version, sep='\t')
Exemple #10
0
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

#Sobre el mejor modelo, obtener una referencia del mismo
from azureml.core import Run
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

#Descargar el modelo
model_file_name = best_run.get_file_names()[0]
best_run.download_file(name=model_file_name)

#Cargamos el modelo en python
import joblib
clf = joblib.load(model_file_name)

print("El proceso ha finalizado correctamente!!")
#Uso del clasificador para nuevos datos (referencia)
#y_predecido = clf.predict(X)
#Comparar x con y y calcular rmse
#rmse = math.sqrt(mean_squared_error(y_true=y_realdelosdatos, y_pred=y_predecido))
Exemple #11
0
from azureml.automl.core.shared.constants import MODEL_PATH

train_experiment_name = '<<train_experiment_name>>'
train_run_id = '<<train_run_id>>'
target_column_name = '<<target_column_name>>'
test_dataset_name = '<<test_dataset_name>>'

run = Run.get_context()
ws = run.experiment.workspace

# Get the AutoML run object from the experiment name and the workspace
train_experiment = Experiment(ws, train_experiment_name)
automl_run = Run(experiment=train_experiment, run_id=train_run_id)

# Download the trained model from the artifact store
automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl')

# get the input dataset by name
test_dataset = Dataset.get_by_name(ws, name=test_dataset_name)

X_test_df = test_dataset.drop_columns(
    columns=[target_column_name]).to_pandas_dataframe().reset_index(drop=True)
y_test_df = test_dataset.with_timestamp_columns(None).keep_columns(
    columns=[target_column_name]).to_pandas_dataframe()

fitted_model = joblib.load('model.pkl')

y_pred, X_trans = fitted_model.rolling_evaluation(X_test_df, y_test_df.values)

# Add predictions, actuals, and horizon relative to rolling origin to the test feature data
assign_dict = {
print("run_id:", run_id)
print("experiment name:", experiment_name)
print("ws:", ws)

exp = Experiment(workspace=ws, name=experiment_name)

run = Run(experiment=exp, run_id=run_id)
names = run.get_file_names
names()
print('Run ID for last run: {}'.format(run_id))
model_local_dir = "model"
os.makedirs(model_local_dir, exist_ok=True)

# Download Model to Project root directory
model_name = 'model.pkl'
run.download_file(name='./outputs/' + model_name,
                  output_file_path='./model/' + model_name)
print('Downloaded model {} to Project root directory'.format(model_name))
os.chdir('./model')
model = Model.register(
    model_path=model_name,  # this points to a local file
    model_name=model_name,  # this is the name the model is registered as
    tags={
        'area': "predictive maintenance",
        'type': "automl",
        'run_id': run_id
    },
    description="Model for predictive maintenance dataset",
    workspace=ws)
os.chdir('..')
print(
    'Model registered: {} \nModel Description: {} \nModel Version: {}'.format(
Exemple #13
0
    sys.exit(0)

run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace = ws, name = experiment_name)

run = Run(experiment = exp, run_id = run_id)
names=run.get_file_names
names()
print('Run ID for last run: {}'.format(run_id))
model_local_dir="./model"
os.makedirs(model_local_dir,exist_ok=True)

# Download Model to Project root directory
model_name= 'sklearn_regression_model.pkl'
run.download_file(name = './outputs/'+model_name, 
                       output_file_path = model_local_dir+model_name)
print('Downloaded model {} to Project root directory'.format(model_name))

model = Model.register(model_path = model_local_dir+model_name, # this points to a local file
                       model_name = model_name, # this is the name the model is registered as
                       tags = {'area': "diabetes", 'type': "regression", 'run_id' : run_id},
                       description="Regression model for diabetes dataset",
                       workspace = ws)

print('Model registered: {} \nModel Description: {} \nModel Version: {}'.format(model.name, model.description, model.version))

# Remove the evaluate.json as we no longer need it
# os.remove("aml_config/evaluate.json")

# Writing the registered model details to /aml_config/model.json
model_json = {}
Exemple #14
0
cli_auth = AzureCliAuthentication()

# Get workspace
ws = Workspace.from_config(auth=cli_auth)

# Get the latest evaluation result
try:
    with open("run_id.json") as f:
        config = json.load(f)
    if not config["run_id"]:
        raise Exception(
            "No new model to register as production model perform better")
except:
    print("No new model to register as production model perform better")
    # raise Exception('No new model to register as production model perform better')
    sys.exit(0)

run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace=ws, name=experiment_name)

run = Run(experiment=exp, run_id=run_id)

os.makedirs('../mnist-tf/ckpt', exist_ok=True)

for f in run.get_file_names():
    if f.startswith('outputs/model'):
        output_file_path = os.path.join('../mnist-tf/ckpt/', f.split('/')[-1])
        print('Downloading from {} to {} ...'.format(f, output_file_path))
        run.download_file(name=f, output_file_path=output_file_path)
Exemple #15
0
filename = "finalmodel2.pkl"

joblib.dump(value=model, filename='finalmodel2.pkl')
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()


from azureml.core import Run

runid='062ae84f-1f94-43ed-bcf5-377a93b14006'
run = Run(experiment=experiment, run_id=runid)
print(run.get_file_names())


# Change names
run.download_file(name="finalmodel2.pkl")

model = run.register_model(model_name='finalmodel2.pkl',
                           model_path='finalmodel2.pkl')
print(model.name, model.id, model.version, sep='\t')










aml_location = 'outputs/{0}'.format(saved_model_name)

user_folder = '/dbfs/tmp/{0}/'.format(user_name)

# Create objects with folder names
temporary_zip_path = os.path.join(
    user_folder, 'trained_model_zip')  # Folder to zipped model file
temporary_model_path = os.path.join(
    user_folder, 'trained_model')  # Folder for unzipped model files

# Make the temporary directories
os.makedirs(temporary_zip_path, exist_ok=True)
os.makedirs(temporary_model_path, exist_ok=True)

# Download the zip file from AML service to the 'trained_model_zip' folder
best_run.download_file(aml_location, temporary_zip_path)

# Unpack archive to the 'trained_model' folder
shutil.unpack_archive(os.path.join(temporary_zip_path, saved_model_name),
                      temporary_model_path)

# COMMAND ----------

# MAGIC %md
# MAGIC ### Register the model to AML service
# MAGIC
# MAGIC In order to deploy a model in AML - we will 'register' a model with the service. This allows us for model traceability in the service.
# MAGIC
# MAGIC There are 3 different pieces of deploying an AML model.
# MAGIC 1. `Model` objects store the model files for scoring and metadata about the model
# MAGIC 1. `Image` objects refer to Docker images that were built with a registered `model`