Esempio n. 1
0
def downloadPickles(ws, modelName, outputPath="./pickles", modelVer=None):
    if modelVer == 'best':
        bestModel = None
        maxAcc = -1
        for model in Model.list(ws, modelName, ["accuracy"]):
            modelAcc = float(model.tags["accuracy"])
            if modelAcc > maxAcc:
                bestModel = model
                maxAcc = modelAcc

        print(f"### Best model with highest accuracy of {maxAcc} found")

        if not bestModel:
            model = Model(ws, modelName)
            print("### WARNING! No best model found, using latest instead")
    elif modelVer is not None:
        model = Model(ws, modelName, version=modelVer)
    else:
        model = Model(ws, modelName)

    print(f"### Using model version {model.version}")
    # Echo'ing out this magic string sets an output variable in Azure DevOps pipeline
    # Set AZML_MODEL_VER for use by subsequent steps
    print(f"##vso[task.setvariable variable=AZML_MODEL_VER]{model.version}")

    # These are special tags, lets us get back to the run that created the model
    try:
        runId = model.tags['aml-runid']
        experimentName = model.tags['aml-experiment']
    except:
        print(
            "### ERROR! Model missing `aml-runid` and `aml-experiment` tags, Can't continue!"
        )
        exit()

    exp = Experiment(workspace=ws, name=experimentName)
    run = Run(exp, runId)
    if run.status != "Completed":
        print(f'### ERROR! Run {runId} did not complete!')
        return

    print(f'### Will download from run {runId} in {experimentName}')

    # Now we can get all the files created with the run, grab all the .pkls
    for f in run.get_file_names():
        if f.endswith('.pkl'):
            output_file_path = os.path.join(outputPath, f.split('/')[-1])
            print('### Downloading from {} to {} ...'.format(
                f, output_file_path))
            run.download_file(name=f, output_file_path=output_file_path)

    # Add some extra metadata, handy to have
    metadata = {
        'name': model.name,
        'version': model.version,
        'tags': model.tags
    }
    with open(f"{outputPath}/metadata.json", 'w') as metadata_file:
        print(f"### Storing metadata in {outputPath}/metadata.json")
        json.dump(metadata, metadata_file)
Esempio n. 2
0
def download_run_outputs_by_prefix(
        blobs_prefix: Path, destination: Path, run: Run
) -> None:
    """
    Download all the blobs from the run's default output directory: DEFAULT_AML_UPLOAD_DIR ("outputs") that
    have a given prefix (folder structure). When saving, the prefix string will be stripped off. For example,
    if blobs_prefix = "foo", and the run has a file "outputs/foo/bar.csv", it will be downloaded to destination/bar.csv.
    If there is in addition a file "foo.txt", that file will be skipped.
    :param blobs_prefix: The prefix for all files in "outputs" that should be downloaded.
    :param run: The AzureML run to download the files from.
    :param destination: Local path to save the downloaded blobs to.
    """
    prefix_str = str((fixed_paths.DEFAULT_AML_UPLOAD_DIR / blobs_prefix).as_posix())
    logging.info(
        f"Downloading multiple files from run {run.id}: {prefix_str} -> {str(destination)}"
    )
    # There is a download_files function, but that can time out when downloading several large checkpoints file
    # (120sec timeout for all files).
    for file in run.get_file_names():
        if file.startswith(prefix_str):
            target_path = file[len(prefix_str):]
            if target_path.startswith("/"):
                target_path = target_path[1:]
                logging.info(f"Downloading {file}")
                run.download_file(
                    file, str(destination / target_path), _validate_checksum=True
                )
            else:
                logging.warning(
                    f"Skipping file {file}, because the desired prefix {prefix_str} is not aligned with "
                    f"the folder structure"
                )
Esempio n. 3
0
def register_model(run_id, experiment):
    best_run = Run(experiment=experiment, run_id=run_id)
    files = best_run.get_file_names()
    r = re.compile('outputs.*')
    model_path = [l for l in files if r.match(l)]
    path, model = os.path.split(model_path[0])

    model = best_run.register_model(model_name=model,
                                    model_path='outputs/model.pkl')
    return path
def get_job_log_file(run: Run, index: Optional[int] = None) -> str:
    """
    Reads the job log file (70_driver_log.txt or std_log.txt) of the given job. If an index is provided, get
    the matching file from a multi-node job.
    :return: The contents of the job log file.
    """
    assert run.status == RunStatus.COMPLETED
    files = run.get_file_names()
    suffix = (f"_{index}" if index is not None else "") + ".txt"
    file1 = "azureml-logs/70_driver_log" + suffix
    file2 = "user_logs/std_log" + suffix
    if file1 in files:
        file = file1
    elif file2 in files:
        file = file2
    else:
        raise ValueError(f"No log file ({file1} or {file2}) present in the run. Existing files: {files}")
    downloaded = tempfile.NamedTemporaryFile().name
    run.download_file(name=file, output_file_path=downloaded)
    return Path(downloaded).read_text()
Esempio n. 5
0
    model_name = "model_alpha_" + str(alpha) + ".pkl"
    filename = "outputs/" + model_name

    joblib.dump(value=model, filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()

minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print("Best run_id: " + minimum_rmse_runid)
print("Best run_id rmse: " + str(minimum_rmse))

best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())
Esempio n. 6
0
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

#Sobre el mejor modelo, obtener una referencia del mismo
from azureml.core import Run
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

#Descargar el modelo
model_file_name = best_run.get_file_names()[0]
best_run.download_file(name=model_file_name)

#Cargamos el modelo en python
import joblib
clf = joblib.load(model_file_name)

print("El proceso ha finalizado correctamente!!")
#Uso del clasificador para nuevos datos (referencia)
#y_predecido = clf.predict(X)
#Comparar x con y y calcular rmse
#rmse = math.sqrt(mean_squared_error(y_true=y_realdelosdatos, y_pred=y_predecido))
Esempio n. 7
0
from azureml.core.authentication import AzureCliAuthentication
cli_auth = AzureCliAuthentication()

# Get workspace
ws = Workspace.from_config(auth=cli_auth)

# Get the latest evaluation result
try:
    with open("run_id.json") as f:
        config = json.load(f)
    if not config["run_id"]:
        raise Exception(
            "No new model to register as production model perform better")
except:
    print("No new model to register as production model perform better")
    # raise Exception('No new model to register as production model perform better')
    sys.exit(0)

run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace=ws, name=experiment_name)

run = Run(experiment=exp, run_id=run_id)

os.makedirs('../mnist-tf/ckpt', exist_ok=True)

for f in run.get_file_names():
    if f.startswith('outputs/model'):
        output_file_path = os.path.join('../mnist-tf/ckpt/', f.split('/')[-1])
        print('Downloading from {} to {} ...'.format(f, output_file_path))
        run.download_file(name=f, output_file_path=output_file_path)
        run_id = run._run_id
        last = False

print()
print(f"Last Run ID: {run_id}")

# register last run_id as environment variable so it can be used in %sh commands
os.environ['RUN_ID_LAST'] = run_id
# print(os.getenv('RUN_ID_LAST'))

# COMMAND ----------

import mlflow.sklearn
# run_id = "434c0e56-307a-4e40-a170-343530c77386"
run = Run(experiment, run_id=run_id)
run.get_file_names()

# COMMAND ----------

run.download_files(output_directory=f"/tmp/{run_id}")

# COMMAND ----------

# MAGIC %sh
# MAGIC ls -l /tmp/$RUN_ID_LAST/random_forest_model

# COMMAND ----------

model_uri_aml = f"/tmp/{run_id}/random_forest_model"
model = mlflow.sklearn.load_model(model_uri_aml)
# MAGIC Each time we ran the models, we stored a zip file with the trained model in AML. We can now retrieve the trained model of the particular run that we want to deploy. We'll copy the relevant `id` from above and retrieve the Run object.

# COMMAND ----------

best_run_id = '6d670807-6477-4ea6-a98b-84069c888346'
best_run = Run(experiment, best_run_id)

# COMMAND ----------

# MAGIC %md
# MAGIC #### 4. Download the model locally and unzip

# COMMAND ----------

# First, let's list the filenames
best_run.get_file_names()

# COMMAND ----------

# In this case, we're looking for the 'outputs/{MODEL_NAME}Model.zip' file
import shutil
import os

saved_model_name = 'model.zip'  # Saved model filename in AML Service - leave out the 'outputs/' folder at the beginning
aml_location = 'outputs/{0}'.format(saved_model_name)

user_folder = '/dbfs/tmp/{0}/'.format(user_name)

# Create objects with folder names
temporary_zip_path = os.path.join(
    user_folder, 'trained_model_zip')  # Folder to zipped model file