Beispiel #1
0
def download_conda_dependency_files(model: Model, dir_path: Path) -> List[Path]:
    """
    Identifies all the files with basename "environment.yml" in the model and downloads them
    to tmp_environment_001.yml, tmp_environment_002.yml etc. Normally there will be one of these
    if the model was build directly from a clone of the InnerEye-DeepLearning repo, or two if
    it was built from the user's own directory which had InnerEye-Deeplearning as a submodule.
    :param model: model to search in
    :param dir_path: folder to write the tmp...yml files into
    :return: a list of the tmp...yml files created
    """
    url_dict = model.get_sas_urls()
    downloaded: List[Path] = []
    for path, url in url_dict.items():
        if Path(path).name == ENVIRONMENT_YAML_FILE_NAME:
            target_path = dir_path / f"tmp_environment_{len(downloaded) + 1:03d}.yml"
            target_path.write_bytes(requests.get(url, allow_redirects=True).content)
            # Remove additional information from the URL to make it more legible
            index = url.find("?")
            if index > 0:
                url = url[:index]
            logging.info(f"Downloaded {target_path} from {url}")
            downloaded.append(target_path)
    if not downloaded:
        logging.warning(f"No {ENVIRONMENT_YAML_FILE_NAME} files found in the model!")
    return downloaded
def submit_for_inference(args: SubmitForInferenceConfig,
                         azure_config: AzureConfig) -> Optional[Path]:
    """
    Create and submit an inference to AzureML, and optionally download the resulting segmentation.
    :param azure_config: An object with all necessary information for accessing Azure.
    :param args: configuration, see SubmitForInferenceConfig
    :return: path to downloaded segmentation on local disc, or None if none.
    """
    logging.info(f"Building Azure configuration from {args.settings}")
    logging.info("Getting workspace")
    workspace = azure_config.get_workspace()
    logging.info("Identifying model")
    model = Model(workspace=workspace, id=args.model_id)
    model_id = model.id
    logging.info(f"Identified model {model_id}")
    source_directory = tempfile.TemporaryDirectory()
    source_directory_path = Path(source_directory.name)
    logging.info(
        f"Building inference run submission in {source_directory_path}")
    image_folder = source_directory_path / DEFAULT_DATA_FOLDER
    image = copy_image_file(args.image_file, image_folder, args.use_dicom)
    model_sas_urls = model.get_sas_urls()
    # Identifies all the files with basename "environment.yml" in the model and downloads them.
    # These downloads should go into a temp folder that will most likely not be included in the model itself,
    # because the AzureML run will later download the model into the same folder structure, and the file names might
    # clash.
    temp_folder = source_directory_path / "temp_for_scoring"
    conda_files = download_files_from_model(model_sas_urls,
                                            ENVIRONMENT_YAML_FILE_NAME,
                                            dir_path=temp_folder)
    if not conda_files:
        raise ValueError(
            "At least 1 Conda environment definition must exist in the model.")
    # Retrieve the name of the Python environment that the training run used. This environment should have been
    # registered. If no such environment exists, it will be re-create from the Conda files provided.
    python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
    # Copy the scoring script from the repository. This will start the model download from Azure, and invoke the
    # scoring script.
    entry_script = source_directory_path / Path(RUN_SCORING_SCRIPT).name
    shutil.copyfile(str(repository_root_directory(RUN_SCORING_SCRIPT)),
                    str(entry_script))
    source_config = SourceConfig(
        root_folder=source_directory_path,
        entry_script=entry_script,
        script_params=[
            "--model-folder",
            ".",
            "--model-id",
            model_id,
            SCORE_SCRIPT,
            # The data folder must be relative to the root folder of the AzureML job. test_image_files
            # is then just the file relative to the data_folder
            "--data_folder",
            image.parent.name,
            "--image_files",
            image.name,
            "--use_dicom",
            str(args.use_dicom)
        ],
        conda_dependencies_files=conda_files,
    )
    run_config = create_run_config(azure_config,
                                   source_config,
                                   environment_name=python_environment_name)
    exp = Experiment(workspace=workspace, name=args.experiment_name)
    run = exp.submit(run_config)
    logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}")
    logging.info(f"Run URL: {run.get_portal_url()}")
    if not args.keep_upload_folder:
        source_directory.cleanup()
        logging.info(f"Deleted submission directory {source_directory_path}")
    if args.download_folder is None:
        return None
    logging.info("Awaiting run completion")
    run.wait_for_completion()
    logging.info(f"Run has completed with status {run.get_status()}")
    download_path = choose_download_path(args.download_folder)
    logging.info(f"Attempting to download segmentation to {download_path}")
    run.download_file(DEFAULT_RESULT_IMAGE_NAME, str(download_path))
    if download_path.exists():
        logging.info(f"Downloaded segmentation to {download_path}")
    else:
        logging.warning("Segmentation NOT downloaded")
    return download_path