def fetch_run_for_experiment(experiment_to_recover: Experiment,
                             run_id_or_number: str) -> Run:
    """
    :param experiment_to_recover: an experiment
    :param run_id_or_number: a string representing the Run ID or Run Number of one of the runs of the experiment
    :return: the run matching run_id_or_number; raises an exception if not found
    """
    available_runs = experiment_to_recover.get_runs()
    try:
        run_number = int(run_id_or_number)
        for run in available_runs:
            if run.number == run_number:
                return run
    except ValueError:
        # will be raised in run_id_or_number does not represent a number
        pass
    try:
        return get_run(experiment=experiment_to_recover,
                       run_id=run_id_or_number,
                       rehydrate=True)
    except Exception:
        available_ids = ", ".join([run.id for run in available_runs])
        raise (Exception(
            "Run {} not found for experiment: {}. Available runs are: {}".
            format(run_id_or_number, experiment_to_recover.name,
                   available_ids)))
Example #2
0
def cancel_running_and_queued_jobs() -> None:
    environ = os.environ
    print("Authenticating")
    auth = ServicePrincipalAuthentication(
        tenant_id='72f988bf-86f1-41af-91ab-2d7cd011db47',
        service_principal_id=environ["APPLICATION_ID"],
        service_principal_password=environ["APPLICATION_KEY"])
    print("Getting AML workspace")
    workspace = Workspace.get(name="InnerEye-DeepLearning",
                              auth=auth,
                              subscription_id=environ["SUBSCRIPTION_ID"],
                              resource_group="InnerEye-DeepLearning")
    branch = environ["BRANCH"]
    print(f"Branch: {branch}")
    if not branch.startswith("refs/pull/"):
        print("This branch is not a PR branch, hence not cancelling anything.")
        exit(0)
    experiment_name = branch.replace("/", "_")
    print(f"Experiment: {experiment_name}")
    experiment = Experiment(workspace, name=experiment_name)
    print(f"Retrieved experiment {experiment.name}")
    for run in experiment.get_runs(include_children=True, properties={}):
        assert isinstance(run, Run)
        status_suffix = f"'{run.status}' run {run.id} ({run.display_name})"
        if run.status in (RunStatus.COMPLETED, RunStatus.FAILED,
                          RunStatus.FINALIZING, RunStatus.CANCELED,
                          RunStatus.CANCEL_REQUESTED):
            print(f"Skipping {status_suffix}")
        else:
            print(f"Cancelling {status_suffix}")
            run.cancel()
def cancel_all_runs(exp_name, run_id=None):

    ws = get_workspace()

    exp = Experiment(ws, exp_name)

    if run_id:
        r = get_run(experiment=exp, run_id=run_id, rehydrate=True)

        # check the returned run type and status
        print(type(r), r.get_status())

        # you can cancel a run if it hasn't completed or failed
        if r.get_status() not in ['Complete', 'Failed']:
            r.cancel()
    else:
        # if you don't know the run id, you can list all runs under an experiment
        for r in exp.get_runs():
            run = get_run(experiment=exp, run_id=r.id, rehydrate=True)
            for c in run.get_children():
                for gc in c.get_children():
                    if gc.get_status() == "Running" or gc.get_status(
                    ) == "Queued":
                        print(gc.id, gc.get_status())
                        gc.cancel()
                if c.get_status() == "Running" or c.get_status() == "Queued":
                    print(c.id, c.get_status())
                    c.cancel()
            if r.get_status() == "Running" or r.get_status() == "Queued":
                print(r.id, r.get_status())
                r.cancel()
Example #4
0
def toAzure():
    import azureml.core
    from azureml.core import Workspace
    from azureml.core import Experiment
    import shutil, os, glob
    from azureml.core.authentication import InteractiveLoginAuthentication

    with open("outputs/_experiment-name_.txt", "r", encoding="utf-8") as file:
        experiment_name = file.readline()

    try:
        ws = Workspace.get(
            name="sparknlp",
            subscription_id="bc5674c1-2f09-4eff-8497-b97f5466158f",
            resource_group="datascientists")

    except:

        interactive_auth = InteractiveLoginAuthentication(
            tenant_id="55574e46-daf5-45bd-8659-de00e36fb97c", force=True)
        ws = Workspace.get(
            name="sparknlp",
            subscription_id="bc5674c1-2f09-4eff-8497-b97f5466158f",
            resource_group="datascientists",
            auth=interactive_auth)

    experiment = Experiment(workspace=ws, name=experiment_name)

    notebooks = glob.glob("*.ipynb")
    for nb in notebooks:
        shutil.copy(nb, "outputs/_notebooks/CopyOf_" + nb)

    run = experiment.start_logging()
    print(
        f"Uploading the content of your '{experiment_name}' to Azure Cloud...")

    run.complete()
    runs = experiment.get_runs()

    print(f"Your {len(list(runs))}. run was uploaded.")
    print(
        """You can view your logs on Microsoft Azure Machine Learning Studio. To view the 
details of your last run, click the link below :""")

    runs = experiment.get_runs()
    return list(runs)[0]
Example #5
0
 def get_run_and_download_pytest(branch: str,
                                 number: int) -> Optional[Path]:
     experiment = Experiment(workspace,
                             name=to_azure_friendly_string(branch))
     runs = [run for run in experiment.get_runs() if run.number == number]
     if len(runs) != 1:
         raise ValueError(
             f"Expected to get exactly 1 run in experiment {experiment.name}"
         )
     return download_pytest_result(runs[0], output_dir)
def cancel_runs_in_experiment(ws, experiment):
    failed_experiment = Experiment(ws, experiment)
    all_runs = failed_experiment.get_runs()
    for idx, run in enumerate(all_runs):
        try:
            if run.status == 'Running':
                run = Run(failed_experiment, run.id)
                print('Canceling run: ', run)
                run.cancel()
        except Exception as e:
            print('Canceling run failed due to ', e)
Example #7
0
def show_git_versions(ctx):
    """
    List all experiment runs and their git version
    """

    ws = get_workspace(config)

    exp = Experiment(ws, config["experiment_name"])

    versions = [(run.id, run.get_properties()["azureml.git.commit"])
                for run in exp.get_runs()]

    print(tabulate(versions, headers=["Run ID", "Git Version"]))
Example #8
0
def fetch_run_for_experiment(experiment_to_recover: Experiment, run_id: str) -> Run:
    """
    :param experiment_to_recover: an experiment
    :param run_id: a string representing the Run ID of one of the runs of the experiment
    :return: the run matching run_id_or_number; raises an exception if not found
    """
    try:
        return get_run(experiment=experiment_to_recover, run_id=run_id, rehydrate=True)
    except Exception:
        available_runs = experiment_to_recover.get_runs()
        available_ids = ", ".join([run.id for run in available_runs])
        raise (Exception(
            "Run {} not found for experiment: {}. Available runs are: {}".format(
                run_id, experiment_to_recover.name, available_ids)))
Example #9
0
class ExperimentStorage:
    def __init__(self, workspace: Workspace, experiment_id: str):
        self.experiment_id = experiment_id
        self.experiment = Experiment(workspace, experiment_id)

    def download_output(self, experiment_run=None):
        if experiment_run is None:
            experiment_run: Run = next(self.experiment.get_runs())
        model_path = os.path.join(TRAINED_MODELS_PATH, self.experiment_id)
        logger.info(f"Downloading results in {model_path}")
        os.makedirs(model_path, exist_ok=True)
        experiment_run.download_files("outputs/",
                                      model_path,
                                      append_prefix=False)
Example #10
0
def fetch_runs(experiment: Experiment, filters: List[str]) -> List[Run]:
    """
    Fetch the runs in an experiment.
    :param experiment: the experiment to fetch runs from
    :param filters: a list of run status to include. Must be subset of [Running, Completed, Failed, Canceled].
    :return: the list of runs in the experiment
    """
    exp_runs = list(experiment.get_runs())

    if len(filters) != 0:
        if set.issubset(set(filters), ["Running", "Completed", "Failed", "Canceled"]):
            runs = [run for run in exp_runs if run.status in filters]
            exp_runs = runs

    return exp_runs
def test_registered_model_metric(get_ws_config):
    try:
        with open("aml_config/run_id.json") as f:
            config = json.load(f)
            new_model_run_id = config["run_id"]
            if new_model_run_id != "":
                experiment_name = config["experiment_name"]
                exp = Experiment(workspace=ws, name=experiment_name)
                model_list = Model.list(
                    ws, tags={"area": "predictive maintenance"})
                production_model = model_list[0]
                run_list = exp.get_runs()
                new_model_run = Run(exp, run_id=new_model_run_id)
                new_model_metric = new_model_run.get_metrics().get('accuracy')
                assert new_model_metric > 0.85, "Above 85% accuracy"
    except FileNotFoundError:
        print("No new model registered to test")
Example #12
0
 def getOperationStatus(self, operationVerb, operationId, userId,
                        subscriptionId):
     experimentName = subscriptionId
     exp = Experiment(self._workspace, experimentName)
     operationName = self.GetOperationNameByVerb(operationVerb)
     tags = {
         'userId': userId,
         'operationId': operationId,
         'operationName': operationName,
         'subscriptionId': subscriptionId
     }
     runs = exp.get_runs(type='azureml.PipelineRun', tags=tags)
     try:
         run = next(runs)
         result = {'operationId': operationId, 'status': run.status}
         return result
     except StopIteration:
         raise LunaUserException(
             HTTPStatus.NOT_FOUND,
             'Operation "{}" with id {} does not exist.'.format(
                 operationVerb, operationId))
Example #13
0
 def listAllOperations(self, operationVerb, userId, subscriptionId):
     experimentName = subscriptionId
     operationName = self.GetOperationNameByVerb(operationVerb)
     exp = Experiment(self._workspace, experimentName)
     tags = {
         'userId': userId,
         'operationName': operationName,
         'subscriptionId': subscriptionId
     }
     runs = exp.get_runs(type='azureml.PipelineRun', tags=tags)
     resultList = []
     while True:
         try:
             run = next(runs)
             result = {
                 'operationId': run.tags["operationId"],
                 'status': run.status
             }
             resultList.append(result)
         except StopIteration:
             break
     return resultList
Example #14
0
    def get_run(self, ws_name, run_name):
        if not "." in run_name:
            errors.general_error(
                "Azure ML run name must be of the form: exper.runname")

        ws = self.get_aml_ws(ws_name)
        console.diag("after get_aml_ws() call")

        exper_name, run_part = run_name.split(".")
        experiment = Experiment(ws, name=exper_name)
        runs = experiment.get_runs(properties={"xt_run_name": run_name})
        console.diag("after experiment.get_runs() call")

        runs = list(runs)
        console.diag("after list(runs), len={}".format(len(runs)))

        # run_number = int(run_part[3:])
        # target_run = None

        #runs = [run for run in runs if run.number == run_number]
        target_run = runs[0] if len(runs) else None

        return target_run
Example #15
0
 def listAllOperationOutputs(self, operationNoun, userId, subscriptionId):
     operationName = self.GetOperationNameByNoun(operationNoun)
     experimentName = subscriptionId
     exp = Experiment(self._workspace, experimentName)
     tags = {
         'userId': userId,
         'operationName': operationName,
         'subscriptionId': subscriptionId
     }
     runs = exp.get_runs(type='azureml.PipelineRun', tags=tags)
     results = []
     while True:
         try:
             run = next(runs)
             output, outputType = self.getOperationOutput(
                 operationNoun,
                 run.tags["operationId"],
                 userId,
                 subscriptionId,
                 downloadFiles=False)
             if output:
                 if outputType == "model" or outputType == "endpoint":
                     results.append(output)
                 elif outputType == "json":
                     results.append({
                         "operationId": run.tags["operationId"],
                         "output": result
                     })
                 elif outputType == "file":
                     results.append({
                         "operationId": run.tags["operationId"],
                         "outputType": "file"
                     })
         except StopIteration:
             break
     return results
Example #16
0
def build_results_dataframe_from_azml():

    from azureml.core import Workspace, Experiment

    workspace = Workspace.get(sharedconfig.workspace_name)

    experiment = Experiment(workspace, sharedconfig.experiment_name)

    runs = [run for run in experiment.get_runs() if run.status == "Completed"]

    results = []
    for run in tqdm(runs):
        tags = {
            k: v
            for k, v in run.get_tags().items() if not k.startswith("_")
        }
        tags["num_nodes"] = int(tags["num_nodes"])
        tags["iter"] = int(tags["iter"])
        tags["ims_per_gpu"] = int(tags["ims_per_gpu"])
        tags["fps"], tags["dfps"], _ = get_driver0_fps(run)

        results.append(tags)

    return pd.DataFrame(results)
Example #17
0
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

# Register a model from a successful run in a given experiment
from azureml.core.model import Model
from azureml.core import Experiment
from azureml.core import Run
from workspace import get_workspace

workspace = get_workspace()

# Select the experiment
experiment = Experiment(workspace=workspace, name='test_experiment_1')

# Get all the runs for the experiment. Returns a generator, which yields the
# runs in reverse chronological order - ie the latest run first. Here, we
# simply select the latest run
runs = experiment.get_runs()
run = next(runs)

# Register the model from the run with a name, some tags and some properties
run.register_model(model_name='model_from_test_experiment_1',
                   tags={'tag1': 'v1'},
                   properties={'property1': 'p1'},
                   model_path='outputs/churn-model-2.pkl')
Example #18
0
args = parser.parse_args()
file_prefix = args.file_prefix
granularity = args.granularity

# Setup AML
subscription_id = os.environ['AML_SUBSCRIPTION']
resource_group = os.environ['AML_RESOURCE_GROUP']
workspace_name = os.environ['AML_WORKSPACE']

ws = Workspace(subscription_id, resource_group, workspace_name)
experiment_name = 'forecast_automl_' + file_prefix + '_' + granularity

# Register the model from last best run
print('registering the latest model for {0}'.format(experiment_name))
exp = Experiment(workspace=ws, name=experiment_name)
run_generator = exp.get_runs()
run_latest = next(run_generator)
if run_latest.get_status() != 'Completed' or run_latest.type != 'automl':
    raise Exception('the last run is not completed or is not automl')

run_id = run_latest.get_details()['runId']
automl_run = AutoMLRun(exp, run_id)
best_run, fitted_model = automl_run.get_output()
model_name = experiment_name.replace('-', '').replace('_', '').lower()
# Register a model
model = best_run.register_model(model_name=model_name,
                                model_path='outputs/model.pkl')
# Get existing model
#model=Model(ws, model_name)

# Figure out the run's dependencies
Example #19
0
                                                and args.local_dir):
    print(
        'Must specify both remote_dir and local_dir to sync files from Experiment'
    )
    sys.exit()

# Get the AzureML Workspace the Experiment is running in
ws = Workspace.get(name=args.workspace,
                   subscription_id=args.subscription,
                   resource_group=args.resource_group)

# Find the Experiment
experiment = Experiment(workspace=ws, name=args.experiment)

# Find the Run
runs = [r for r in experiment.get_runs()]

if len(runs) == 0:
    print("No runs found in Experiment '{}'".format(args.experiment))
    sys.exit()

run = runs[0]
if args.run is not None:
    try:
        run = next(r for r in runs if r.id == args.run)
    except StopIteration:
        print("Run id '{}' not found in Experiment '{}'".format(
            args.run, args.experiment))
        sys.exit()

# Optionally start synchronizing files from Run
Example #20
0
def get_run_by_tags(tags):
    exp = Experiment(ws, experimentName)
    runs = exp.get_runs(type='azureml.PipelineRun', tags=tags)
    run = next(runs)
    print(run.status)
    return run
Example #21
0
from azure.common.client_factory import get_client_from_cli_profile
from azure.mgmt.resource import SubscriptionClient
from azureml.core import Experiment
from azureml.core import Workspace
from azureml.core.authentication import AzureCliAuthentication
from azureml.tensorboard import Tensorboard

cli_auth = AzureCliAuthentication()
subscription_client = get_client_from_cli_profile(SubscriptionClient)
subscription_id = next(
    subscription_client.subscriptions.list()).subscription_id

ws = Workspace(
    subscription_id=subscription_id,
    resource_group="ds_envs_RG",
    workspace_name="ds_envs_ws",
    auth=cli_auth,
)
experiment_name = "my_experiment"
run_id = "my_experiment_1603471452_ed6739ca"
experiment = Experiment(workspace=ws, name=experiment_name)
run = [i for i in experiment.get_runs() if i.id == run_id][0]
tb = Tensorboard([run])
tb.start(start_browser=True)
input("Press Enter to continue...")
tb.stop()
Example #22
0
def getMetrics(ws, experiment_name, tags={}):
    experiment = Experiment(workspace=ws, name=experiment_name)
    for run in experiment.get_runs(tags=tags):
        print(run.get_metrics())
Example #23
0
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('experiment')
parser.add_argument('--workspace-config', default="azureml_config.json")

args = parser.parse_args()
print(args)


def stop_run(r):
    status = r.get_status()
    print(f"Stopping {r.type}, {r.id}, {status}")

    if status == 'Running':
        if 'cancel' in dir(r):
            r.cancel()
        else:
            r.complete()

    for c in r.get_children():
        stop_run(c)


ws = Workspace.from_config(path=args.workspace_config)
print('=' * 40)
print(ws)

exp = Experiment(ws, args.experiment)
for run in exp.get_runs():
    stop_run(run)
Example #24
0
    def getOperationOutput(self,
                           operationNoun,
                           operationId,
                           userId,
                           subscriptionId,
                           downloadFiles=True):
        operationName = self.GetOperationNameByNoun(operationNoun)

        if operationName == 'train':

            tags = [['userId', userId], ['modelId', operationId],
                    ['subscriptionId', subscriptionId]]
            models = Model.list(self._workspace, tags=tags)
            if len(models) == 0:
                return None
            model = models[0]
            result = {
                'id': operationId,
                'description': model.description,
                'created_time': model.created_time
            }
            return result, "model"

        if operationName == 'deploy':

            tags = [['userId', userId], ['endpointId', operationId],
                    ['subscriptionId', subscriptionId]]
            endpoints = Webservice.list(self._workspace, tags=tags)
            if len(endpoints) == 0:
                return None, None
            endpoint = endpoints[0]
            primaryKey, secondaryKey = endpoint.get_keys()
            result = {
                'id': operationId,
                'description': endpoint.description,
                'created_time': endpoint.created_time,
                'scoring_uri': endpoint.scoring_uri,
                'primary_key': primaryKey,
                'secondary_key': secondaryKey
            }

            return result, "endpoint"

        tags = {
            'userId': userId,
            'operationId': operationId,
            'operationName': operationName,
            'subscriptionId': subscriptionId
        }

        experimentName = subscriptionId
        exp = Experiment(self._workspace, experimentName)
        runs = exp.get_runs(type='azureml.PipelineRun', tags=tags)
        try:
            run = next(runs)
            child_runs = run.get_children()
            child_run = next(child_runs)
            outputType = self._utils.GetOutputType(operationName)
            if outputType == 'json':
                with tempfile.TemporaryDirectory() as tmp:
                    path = os.path.join(tmp, 'output.json')
                    files = child_run.download_file('/outputs/output.json',
                                                    path)
                    with open(path) as file:
                        return json.load(file), "json"
            elif outputType == 'file':
                if downloadFiles:
                    tmp = tempfile.TemporaryDirectory().name
                    path = os.path.join(tmp, "outputs")
                    zip_file_path = os.path.join(
                        tmp, "output_{}.zip".format(operationId))
                    files = child_run.download_files("/outputs",
                                                     path,
                                                     append_prefix=False)
                    zipf = zipfile.ZipFile(zip_file_path, "w",
                                           zipfile.ZIP_DEFLATED)
                    self.zipdir(path, zipf, "outputs")
                    zipf.close()
                    return zip_file_path, "file"
                else:
                    return "file", "file"
        except StopIteration:
            return None
Example #25
0
    config = json.load(f)

new_model_run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace=ws, name=experiment_name)

try:
    # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set.
    model_list = Model.list(ws)
    production_model = next(
        filter(
            lambda x: x.created_time == max(model.created_time
                                            for model in model_list),
            model_list))
    production_model_run_id = production_model.tags.get('run_id')
    run_list = exp.get_runs()
    # Get the run history for both production model and newly trained model and compare mse
    production_model_run = Run(exp, run_id=production_model_run_id)
    new_model_run = Run(exp, run_id=new_model_run_id)

    production_model_metric = production_model_run.get_metrics().get(
        'accuracy')
    new_model_metric = new_model_run.get_metrics().get('accuracy')
    print(
        'Current Production model accuracy: {}, New trained model accuracy: {}'
        .format(production_model_metric, new_model_metric))

    promote_new_model = False
    if new_model_metric < production_model_metric:
        promote_new_model = True
        print('New trained model performs better, thus it will be registered')
Example #26
0
class AzureMLTrainer(trainer.Trainer):
    is_connected: bool = False
    __config_file: str = '.azureml/config.json'
    __workspace: Workspace = None
    __experiment: Experiment = None
    __current_experiment_name: str
    __current_run: Run = None
    __logger: Logger = None
    __vm_size_list: list = None

    def __init__(self, experiment_name: str, aml_workspace: Workspace, aml_run: Run = None):
        '''
        Initializes a new connected Trainer that will persist and log all runs on AzureML workspace
        Args:
            experiment_name (str): The name of the experiment that will be seen on AzureML
            aml_workspace (Workspace): The connected workspace on AzureML
        '''
        self.__workspace = aml_workspace
        self.__logger = logging.getLogger()
        if aml_run is not None:
            self.__current_run = aml_run
            self.__experiment = aml_run.experiment
            self.__current_experiment_name = aml_run.experiment.name
        else:
            self.__current_experiment_name = experiment_name
            self.__experiment = Experiment(workspace=self.__workspace, name=experiment_name)


    @classmethod
    def CreateFromContext(cls):
        '''
        Creates a Trainer, based on the current Run context.  This will only work when used in an Estimator
        Returns: 
            AzureMLTrainer: an instance of AzureMLTrainer allowing the user to work connected.
        '''   
        run = Run.get_context()
        return cls(run.experiment.name, run.experiment.workspace, run)


    def new_run(self, description: str = None, copy_folder: bool = True, metrics: dict = None) -> Run:
        '''
        This will begin a new interactive run on the existing AzureML Experiment.  When a previous run was still active, it will be completed.
        Args:
            description (str): An optional description that will be added to the run metadata
            copy_folder (bool): Indicates if the output folder should be snapshotted and persisted
            metrics (dict): The metrics that should be logged in the run already
        Returns:
            Run: the AzureML Run object that can be used for further access and custom logic
        '''
        if(self.__current_run is not None):
            self.__current_run.complete()
        if(copy_folder):
            self.__current_run = self.__experiment.start_logging()
        else:
            self.__current_run = self.__experiment.start_logging(snapshot_directory = None)

        if(metrics is not None):
            for k, v in metrics.items():
                self.__current_run.log(k, v)

        if(description is not None):
            self.__current_run.log('Description', description)
        
        return self.__current_run

    def add_tuning_result(self, run_index: int, train_score: float, test_score: float, sample_count: int, durations:np.array, parameters: dict, estimator):
        '''
        This add results of a cross validation fold to the child run in a Grid Search
        Args:
            train_score (float): The given score of the training data
            test_score (float): The given score of the test data
            sample_count (int): The number of samples that were part of a fold
            durations (np.array): The different durations of the Grid Search
            parameters (dict): The parameter combinations that have been tested in this cross validation fold
            estimate (model): The actual fitted estimator / model that was trained in this fold
        '''
        _child_run = self.__current_run.child_run('Gridsearch' + str(run_index))
        self.__current_run.log_row('Trainscore', score = train_score)
        self.__current_run.log_row('Testscore', score = test_score)

        _table = {
            'Testing score': test_score,
            'Training score': train_score
            }

        for k in parameters.keys():
            v = parameters[k]
            if(v is None):
                v = 'None'
            _child_run.log(k, v)
            _table[k] = v
        
        self.__current_run.log_row('Results', '', **_table)
        _child_run.complete()


    def get_best_model(self, metric_name:str, take_highest:bool = True):
        '''
        Tags and returns the best model of the experiment, based on the given metric
        Args:
            metric_name (str): The name of the metric, such as accuracy
            take_highest (bool): In case of accuracy and score, this is typically True.  In case you want to get the model based on the lowest error, you can use False
        Returns:
            Run: the best run, which will be labeled as best run
        '''
        runs = {}
        run_metrics = {}
        for r in tqdm(self.__experiment.get_runs()):
            metrics = r.get_metrics()
            if metric_name in metrics.keys():
                runs[r.id] = r
                run_metrics[r.id] = metrics
        best_run_id = min(run_metrics, key = lambda k: run_metrics[k][metric_name])
        best_run = runs[best_run_id]
        best_run.tag('Best run')
        return best_run

    def get_azureml_experiment(self):
        '''
        Gives access to the AzureML experiment object
        Returns:
            Experiment: the existing experiment
        '''
        return self.__experiment
        
    def complete_run(self, fitted_model, metrics_to_log: dict = None, upload_model: bool = True):
        '''
        Saves all results of the active Run and completes it
        Args:
            fitted_model (model): The already fitted model to be tested.  Sklearn and Keras models have been tested
            metrics_to_log (dict): The metrics that should be logged with the model to the run
            upload_model (bool): This will upload the model (pkl file or json) to AzureML run (defaults to True)
        '''
        is_keras = 'keras' in str(type(fitted_model))

        if(metrics_to_log is not None):
            for k, v in metrics_to_log.items():
                self._log_metrics(k, v)
        
        if upload_model:
            # Save the model to the outputs directory for capture
            if(is_keras):
                model_folder_name = 'outputs/model'
                fitted_model.save(model_folder_name)
                files_to_upload = dict()
            else:
                model_file_name = 'outputs/model.pkl'
                joblib.dump(value = fitted_model, filename = model_file_name)

        self._complete_run()

    def evaluate_classifier(self, fitted_model, X_test: np.array, y_test: np.array, show_roc: bool = False, save_curves_as_image: bool = False,
                             class_names: np.array = None, finish_existing_run: bool = True, upload_model: bool = True, return_predictions: bool = False) -> np.array:

        '''
        Will predict and evaluate a model against a test set and save all results to the active Run on AzureML
        Args:
            fitted_model (model): The already fitted model to be tested.  Sklearn and Keras models have been tested
            X_test (np.array): The test set to calculate the predictions with
            y_test (np.array): The output test set to evaluate the predictions against
            show_roc (bool): This will upload the ROC curve to the run in case of a binary classifier
            save_curves_as_image (bool): This will save the training & loss curves as images
            class_names (np.array): The class names that will be linked to the Confusion Matrix.  If not provided, the unique values of the y_test matrix will be used
            finish_existing_run (bool): Will complete the existing run on AzureML (defaults to True)
            upload_model (bool): This will upload the model (pkl file) to AzureML run (defaults to True)
            return_predictions (bool): If true, the y_pred values will be returned
        Returns: 
            np.array: The predicted (y_pred) values against the model
        '''
        is_keras = 'keras' in str(type(fitted_model))
        
        # Predict X_test with model
        if(is_keras):
            if 'predict_classes' in dir(fitted_model):
                y_pred = fitted_model.predict_classes(X_test)
            else:
                y_pred = fitted_model.predict(X_test)
                y_pred = np.argmax(y_pred, axis=1)
            self.add_training_plots(fitted_model, save_image=save_curves_as_image)
        else:
            y_pred = fitted_model.predict(X_test)

        if class_names is None:
            class_names = np.char.mod('%d', sorted(np.unique(y_test)))

        # Print classification report
        print(metrics.classification_report(y_test, y_pred))

        # Confusion matrix
        cf = metrics.confusion_matrix(y_test, y_pred)
        self._log_confmatrix(cf, class_names)

        # Accuracy
        accuracy = metrics.accuracy_score(y_test, y_pred) * 100
        self._log_metrics('accuracy', accuracy, description='')

        if(show_roc == True):
            # Verify that we are having a binary classifier
            if(len(class_names)!=2):
                raise AttributeError('Showing a ROC curve is only possible for binary classifier, not for multi class')
            self.__log_roc_curve(y_test, y_pred) 

        if (finish_existing_run):
            self.complete_run(fitted_model, upload_model = upload_model)

        if return_predictions:  
            return y_pred

    def add_training_plots(self, fitted_model, metrics=None, save_image: bool = False):
        '''
        Add the training plots to the Run history
        Args:
            fitted_model (Keras model): the fitted model that contains the training history
            metrics (list): the metrics that should be tracked to the run.  If None, all available metrics will be taken
        
        '''
        history = fitted_model.history
        if metrics is None:
            metrics = history.history.keys()

        for metric in metrics:
            if(metric in history.history.keys()):
                self.__current_run.log_table(f'Plot {metric}', {metric: history.history[metric]})

                if(save_image and not metric.startswith('val_') and metric in history.history.keys()):
                    plt.plot(history.history[metric])
                    plt.plot(history.history[f'val_{metric}'])
                    plt.title(f'model {metric}')
                    plt.ylabel(metric)
                    plt.xlabel('epoch')
                    plt.legend(['train', 'test'], loc='upper left')
                    #plt.show()
                    self.__current_run.log_image(f'model {metric}', plot=plt)
                    plt.close()

    def evaluate_image_classifier(self, fitted_model, X_test: np.array, y_test: np.array, show_roc: bool = False, failed_classifications_to_save: int = 0, image_shape = None, save_curves_as_image: bool = False,
                                class_names: np.array = None, finish_existing_run: bool = True, upload_model: bool = True, return_predictions: bool = False) -> np.array:

        '''
        Will predict and evaluate a model against a test set and save all results to the active Run on AzureML
        Args:
            fitted_model (model): The already fitted model to be tested.  Sklearn and Keras models have been tested
            X_test (np.array): The test set to calculate the predictions with
            y_test (np.array): The output test set to evaluate the predictions against
            show_roc (bool): This will upload the ROC curve to the run in case of a binary classifier
            failed_classifications_to_save (int): If greather than 0, this amount of incorrectly classified images will be tracked to the Run
            image_shape ((int, int, int)): Indicates if images should be reshaped before saving them
            class_names (np.array): The class names that will be used in the description.  If not provided, the unique values of the y_test matrix will be used
            finish_existing_run (bool): Will complete the existing run on AzureML (defaults to True)
            upload_model (bool): This will upload the model (pkl file) to AzureML run (defaults to True)
        Returns: 
            np.array: The predicted (y_pred) values against the model
        ''' 
        from arcus.ml.images import explorer
        
        y_pred = self.evaluate_classifier(fitted_model, X_test, y_test, show_roc=show_roc, save_curves_as_image=save_curves_as_image, class_names= class_names, finish_existing_run=False, upload_model=upload_model, return_predictions=True)
        if failed_classifications_to_save > 0:
            # Take incorrect classified images and save
            import random
            incorrect_predictions = [i for i, item in enumerate(y_pred) if item != y_test[i]]
            total_images = min(len(incorrect_predictions), failed_classifications_to_save)

            for i in random.sample(incorrect_predictions, total_images):
                pred_class = y_pred[i]
                act_class = y_test[i]
                if class_names is not None:
                    pred_class = class_names[pred_class]
                    act_class = class_names[act_class]
                if image_shape is not None:
                    # Reshape image before saving it
                    imgplot = explorer.show_image(X_test[i].reshape(image_shape), silent_mode=True)
                else:
                    imgplot = explorer.show_image(X_test[i], silent_mode=True)
                description = f'Predicted {pred_class} - Actual {act_class}'
                self.__current_run.log_image(description, plot=imgplot)

        if return_predictions:  
            return y_pred




    def __stack_images(self, img1: np.array, img2: np.array):
        ha,wa = img1.shape[:2]
        hb,wb = img2.shape[:2]
        max_width = np.max([wa, wb])
        total_height = ha+hb
        new_img = np.zeros(shape=(total_height, max_width, 3))
        new_img[:ha,:wa]=img1
        new_img[ha:hb+ha,:wb]=img2
        return new_img

    def __concat_images(self, image_list: np.array) -> np.array:
        output = None
        for i, img in enumerate(image_list):
            if i==0:
                output = img
            else:
                output = self.__stack_images(output, img)
        return output

 

    def save_image_outputs(self, X_test: np.array, y_test: np.array, y_pred: np.array, samples_to_save: int = 1) -> np.array:
        '''
        Will save image outputs to the run
        Args:
            X_test (np.array): The input images for the model
            y_test (np.array): The actual expected output images of the model
            y_pred (np.array): The predicted or calculated output images of the model
            samples_to_save (int): If greather than 0, this amount of input, output and generated image combinations will be tracked to the Run
        ''' 
        from arcus.ml.images import explorer

        if samples_to_save > 0:
            import random
            total_images = min(len(y_pred), samples_to_save)

            for i in random.sample(range(len(y_pred)), total_images):
                newimg = self.__concat_images([X_test[i], y_test[i], y_pred[i]])
                imgplot = explorer.show_image(newimg, silent_mode=True)
                self.__current_run.log_image(f'Image combo sample {i}', plot=imgplot)
                imgplot.close()

    def setup_training(self, training_name: str, overwrite: bool = False):
        '''
        Will initialize a new directory (using the given training_name) and add a training script and requirements file to run training
        Args:
            training_name (str): The name of a training.  This will be used to create a directory.  Can contain subdirectory
            overwrite (bool): Defines if the existing training files should be overwritten
        '''
        if not os.path.exists(training_name):
            os.makedirs(training_name)
        # Take default training script and copy to the new folder
        default_training_script_file = os.path.join(str(os.path.dirname(__file__)), 'resources/train.py')
        default_requirements_file = os.path.join(str(os.path.dirname(__file__)), 'resources/requirements.txt')
        dest_training_script_file = os.path.join(training_name, 'train.py')
        dest_requirements_file = os.path.join(training_name, 'requirements.txt')

        if overwrite or not(os.path.isfile(dest_training_script_file)):
            shutil.copy2(default_training_script_file, training_name)

        if overwrite or not(os.path.isfile(dest_requirements_file)):
            shutil.copy2(default_requirements_file, training_name)
        
    def start_training(self, training_name: str, environment_type: str = None, input_datasets: np.array = None, 
                        input_datasets_to_download: np.array = None, compute_target:str='local', gpu_compute: bool = False, 
                        script_parameters: dict = None, show_widget: bool = True, use_estimator: bool = False, **kwargs):
        ''' 
        Will start a new training, taking the training name as the folder of the run
        Args:
            training_name (str): The name of a training.  This will be used to create a directory.  Can contain subdirectory
            environment_type (str): either the name of an existing environment that will be taken as base, or one of these values (tensorflow, sklearn, pytorch).  
            input_datasets (np.array): An array of data set names that will be mounted on the compute in a directory of the dataset name
            input_datasets_to_download (np.array): An array of data set names that will be downloaded to the compute in a directory of the dataset name
            compute_target (str): The compute target (default = 'local') on which the training should be executed
            gpu_compute (bool): Indicates if GPU compute is required for this script or not
            script_parameters (dict): A dictionary of key/value parameters that will be passed as arguments to the training script
            show_widget (bool): Will display the live tracking of the submitted Run
        Returns:
            Run : the submitted run
        '''
        
        if use_estimator:
            print('Scheduling Estimator training')
            self._start_estimator_training(training_name, environment_type, input_datasets, input_datasets_to_download, compute_target, gpu_compute, script_parameters, show_widget, **kwargs)
        else:
            print('Scheduling ScriptRunConfig training')
            self._start_environment_training(training_name, environment_type, input_datasets, input_datasets_to_download, compute_target, gpu_compute, script_parameters, show_widget, **kwargs)
        
        if script_parameters is not None:
            for arg in script_parameters.keys():
                self.__current_run.log(arg.replace('--', ''), script_parameters[arg])

        print(self.__current_run.get_portal_url())

        if(show_widget):
            from azureml.widgets import RunDetails
            RunDetails(self.__current_run).show()
        return self.__current_run

    def _start_environment_training(self, training_name: str, environment_type: str = None, input_datasets: np.array = None, 
                                    input_datasets_to_download: np.array = None, compute_target:str='local', gpu_compute: bool = False, 
                                    script_parameters: dict = None, show_widget: bool = True, **kwargs):
        ''' 
        Will start a new training using ScriptRunConfig, taking the training name as the folder of the run
        Args:
            training_name (str): The name of a training.  This will be used to create a directory.  Can contain subdirectory
            environment_type (str): either the name of an existing environment that will be taken as base, or one of these values (tensorflow, sklearn, pytorch).  
            input_datasets (np.array): An array of data set names that will be mounted on the compute in a directory of the dataset name
            input_datasets_to_download (np.array): An array of data set names that will be downloaded to the compute in a directory of the dataset name
            compute_target (str): The compute target (default = 'local') on which the training should be executed
            gpu_compute (bool): Indicates if GPU compute is required for this script or not
            script_parameters (dict): A dictionary of key/value parameters that will be passed as arguments to the training script
            show_widget (bool): Will display the live tracking of the submitted Run
        '''
        from azureml.train.estimator import Estimator
        from azureml.core import Environment, ScriptRunConfig
        from azureml.core.runconfig import RunConfiguration
        from azureml.core.runconfig import DataReferenceConfiguration
        from azureml.core.runconfig import CondaDependencies
        from arcus.azureml.experimenting import train_environment as te

        # Check if directory exists
        if not(os.path.exists(training_name) and os.path.isdir(training_name)):
            raise FileNotFoundError(training_name)

        # Check compute target
        if compute_target != 'local':
            self.__check_compute_target(compute_target, gpu_compute)

        training_env = te.get_training_environment(self.__workspace, training_name, os.path.join(training_name, 'requirements.txt'), use_gpu=gpu_compute, include_prerelease=True, environment_type=environment_type)
        runconfig = RunConfiguration()

        # Add datasets
        datarefs = dict()
        
        scriptargs = list()
        if script_parameters is not None:
           for key in script_parameters.keys():
               scriptargs.append(key)
               scriptargs.append(script_parameters[key])

        if(input_datasets is not None):
            for ds in input_datasets:
                print(f'Adding mounting data reference for dataset {ds}')
                # scriptargs.append(ds)
                scriptargs.append(self.__workspace.datasets[ds].as_named_input(ds).as_mount(path_on_compute = ds))
#                datastore, path = self._get_data_reference(self.__workspace.datasets[ds])
#                datarefs[ds] = DataReferenceConfiguration(datastore_name=datastore, path_on_datastore = path, path_on_compute = '/' + ds, mode = 'mount', overwrite = False)
        if(input_datasets_to_download is not None):
            for ds in input_datasets_to_download:
                print(f'Adding download data reference for dataset {ds}')
                # scriptargs.append(ds)
                scriptargs.append(self.__workspace.datasets[ds].as_named_input(ds).as_download(path_on_compute = ds))



        scriptrunconfig = ScriptRunConfig(source_directory='./' + training_name, script="train.py", run_config=runconfig, 
                                            arguments=scriptargs)
        scriptrunconfig.run_config.target = compute_target
        scriptrunconfig.run_config.environment = training_env
        #scriptrunconfig.run_config.data_references = datarefs

        # Submit training
        self.__current_run = self.__experiment.submit(scriptrunconfig)
        


    def _get_data_reference(self, dataset: Dataset):
        import json
        j = json.loads(str(dataset).replace('FileDataset\n', ''))
        source = j['source'][0]
        sections = source.split("'")
        return sections[1], sections[3]

    def _start_estimator_training(self, training_name: str, estimator_type: str = None, input_datasets: np.array = None, input_datasets_to_download: np.array = None, compute_target:str='local', gpu_compute: bool = False, script_parameters: dict = None, show_widget: bool = True, **kwargs):
        ''' 
        Will start a new training using an Estimator, taking the training name as the folder of the run
        Args:
            training_name (str): The name of a training.  This will be used to create a directory.  Can contain subdirectory
            environment_type (str): one of these values (tensorflow, sklearn, pytorch).  
            input_datasets (np.array): An array of data set names that will be mounted on the compute in a directory of the dataset name
            input_datasets_to_download (np.array): An array of data set names that will be downloaded to the compute in a directory of the dataset name
            compute_target (str): The compute target (default = 'local') on which the training should be executed
            gpu_compute (bool): Indicates if GPU compute is required for this script or not
            script_parameters (dict): A dictionary of key/value parameters that will be passed as arguments to the training script
            show_widget (bool): Will display the live tracking of the submitted Run
        '''
        from azureml.train.estimator import Estimator

        # Check if directory exists
        if not(os.path.exists(training_name) and os.path.isdir(training_name)):
            raise FileNotFoundError(training_name)

        # Check compute target
        if compute_target != 'local':
            self.__check_compute_target(compute_target, gpu_compute)
            

        # Add datasets
        datasets = list()
        if(input_datasets is not None):
            for ds in input_datasets:
                datasets.append(self.__workspace.datasets[ds].as_named_input(ds).as_mount(path_on_compute=ds))
        if(input_datasets_to_download is not None):
            for ds in input_datasets_to_download:
                datasets.append(self.__workspace.datasets[ds].as_named_input(ds).as_download(path_on_compute=ds))

        # as mount - as download
        constructor_parameters = {
            'source_directory':training_name,
            'script_params':script_parameters,
            'inputs':datasets,
            'compute_target':compute_target,
            'entry_script':'train.py',
            'pip_requirements_file':'requirements.txt', 
            'use_gpu':gpu_compute,
            'use_docker':True}
        
        print('Creating estimator of type', estimator_type)

        if(estimator_type is None):
            # Using default Estimator
            estimator = Estimator(**constructor_parameters)
        elif(estimator_type == 'tensorflow'):
            from azureml.train.dnn import TensorFlow
            version_par = 'framework_version'
            if(not version_par in constructor_parameters.keys()):
                print('Defaulting to version 2.0 for TensorFlow')
                constructor_parameters[version_par] = '2.0'
            estimator = TensorFlow(**constructor_parameters)
        elif(estimator_type == 'sklearn'):
            from azureml.train.sklearn import SKLearn
            estimator = SKLearn(**constructor_parameters)
        elif(estimator_type == 'pytorch'):
            from azureml.train.dnn import PyTorch
            estimator = PyTorch(**constructor_parameters)

        # Submit training
        self.__current_run = self.__experiment.submit(estimator)

    # protected implementation methods
    def _log_metrics(self, metric_name: str, metric_value: float, description:str = None):
        print(metric_name, metric_value) 

        self.__current_run.log(metric_name, metric_value, description=description)

    
    def _complete_run(self):
        '''
        Completes the current run
        '''
        self.__current_run.complete()

    def _log_confmatrix(self, confusion_matrix: np.array, class_names: np.array):
        data = {}
        data['schema_type'] = 'confusion_matrix'
        data['schema_version'] = 'v1'
        data['data'] = {}
        data['data']['class_labels'] = class_names.tolist()
        data['data']['matrix'] = confusion_matrix.tolist()
        
        print(confusion_matrix)

        json_data = json.dumps(data)
        self.__current_run.log_confusion_matrix('Confusion matrix', json_data, description='')

    def _save_roc_curve(self, roc_auc: float, roc_plot: plt):
        self._log_metrics('roc_auc', roc_auc)
        self.__current_run.log_image('ROC Curve', plot=plt)

    def __check_compute_target(self, compute_target, use_gpu: bool):
        __vm_size = ''
        if isinstance(compute_target, AmlCompute):
            __vm_size = compute_target.vm_size
        elif isinstance(compute_target, str):
            compute = ComputeTarget(workspace=self.__workspace, name=compute_target)
            __vm_size = compute.vm_size

        if self.__vm_size_list is None:
            self.__vm_size_list = AmlCompute.supported_vmsizes(self.__workspace)
        
        vm_description = list(filter(lambda vmsize: str.upper(vmsize['name']) == str.upper(__vm_size), self.__vm_size_list))[0]
        if(use_gpu and vm_description['gpus'] == 0):
            raise errors.TrainingComputeException(f'gpu_compute was specified, but the target does not have GPUs: {vm_description} ')
        if(not (use_gpu) and vm_description['vCPUs'] == 0):
            raise errors.TrainingComputeException(f'cpu_compute was specified, but the target does not have CPUs: {vm_description} ')


    def __log_roc_curve(self, y_pred: np.array, y_test: np.array):
        '''Will upload the Receiver Operating Characteristic (ROC) Curve for binary classifiers

        Args:
            y_pred (np.array): The predicted values of the test set 
            y_test (np.array): The actual outputs of the test set

        Returns: 
            float: The ROC_AUC value
        '''
        # calculate the fpr and tpr for all thresholds of the classification
        fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
        roc_auc = metrics.auc(fpr, tpr)
        plt.cla()
        plt.title('Receiver Operating Characteristic')
        plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
        plt.legend(loc = 'lower right')
        plt.plot([0, 1], [0, 1],'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        self._save_roc_curve(roc_auc, plt)
        plt.show(block=False)
        plt.close()
        return roc_auc
# MAGIC
# MAGIC Each `Run` object has a `get_metrics()` method that will retrieve our stored metrics. We can leverage the `get_runs()` method of the `Experiment` object to retrieve the run objects.
# MAGIC
# MAGIC We will then render a table to compare model performance.

# COMMAND ----------

# Download RMSE and R2 from AML Service
import pandas as pd

# Use list comprehension to retrieve records from experiment object.
run_results = pd.DataFrame.from_records([{
    "id": run.id,
    "RMSE": run.get_metrics().get('RMSE'),
    'R2': run.get_metrics().get('R2')
} for run in experiment.get_runs() if run.get_metrics().get('RMSE') is not None
                                         ])

display(run_results[['id', 'RMSE', 'R2']])

# COMMAND ----------

# MAGIC %md
# MAGIC #### 3. Select Run with Model to deploy
# MAGIC
# MAGIC Each time we ran the models, we stored a zip file with the trained model in AML. We can now retrieve the trained model of the particular run that we want to deploy. We'll copy the relevant `id` from above and retrieve the Run object.

# COMMAND ----------

best_run_id = '6d670807-6477-4ea6-a98b-84069c888346'
best_run = Run(experiment, best_run_id)
Example #28
0
# Online run. Use dataset provided by training notebook.
else:
    print("Running in online mode...")
    experiment = run.experiment
    workspace = experiment.workspace
    dataset_path = run.input_datasets["dataset"]

# Download the model from the provided run.
print("Downloading model from run with id {}...".format(args.run_id))

# Locate the run that contains the model.
experiment_that_contains_model = Experiment(workspace=workspace,
                                            name=args.experiment_name)
run_that_contains_model = None
for experiment_run in experiment_that_contains_model.get_runs():
    if experiment_run.id == args.run_id:
        run_that_contains_model = experiment_run
        break
if run_that_contains_model is None:
    print("ERROR! Run not found!")
    exit(0)

# Download the model.
print("Downloading the model...")
output_directory = "model-" + args.run_id
run_that_contains_model.download_files(output_directory=output_directory)

# Instantiate the model with its weights.
print("Creating the model...")
model = GAPNet()
Example #29
0
    model.fit(X=X_train, y=y_train)
    y_pred = model.predict(X=X_test)
    rmse = math.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
    run.log("rmse", rmse)

    model_name = "model_alpha_" + str(alpha) + ".pkl"
    filename = "outputs/" + model_name

    joblib.dump(value=model, filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()

minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    # each logged metric becomes a key in this returned dict
    run_rmse = run_metrics["rmse"]
    run_id = run_details["runId"]

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print("Best run_id: " + minimum_rmse_runid)
Example #30
0
        arguments=[
            "--aoi",
            args.aoi_file,
            "--feature-file",
            args.feature_file,
            "--model-file",
            os.path.basename(model_file),
        ],
        max_run_duration_seconds=60 * 30,
        environment=load_azml_env(),
    )

    display_name = f"{args.output_prefix} {args.run_id} {args.model_file}"

    existing_runs = [
        run for run in experiment.get_runs() if run.display_name == display_name
    ]
    if len(existing_runs) == 0:
        print("no runs")
        run = experiment.submit(config)
        run.display_name = display_name
        run.wait_for_completion()
    else:
        print("run exists")
        run = existing_runs[0]

    output_dir = f"data/predictions/{args.output_prefix}_{args.run_id}"
    os.makedirs(output_dir, exist_ok=True)

    local_files = []
    for file in run.get_file_names():