Exemple #1
0
def test_load_model_from_recipe(recipe_args, other_args):
    recipe = Zoo.load_recipe(**recipe_args, **other_args)
    recipe_model = Zoo.load_model_from_recipe(recipe, **other_args)
    model_dict = recipe_model.dict()
    for field, value in recipe.model_metadata.dict().items():
        # TODO temporary fix while model apis need to be updated
        if field == "created" or field == "modified" or field == "release_version":
            continue
        assert model_dict[field] == value
Exemple #2
0
def test_search_similar_models(model_args, other_args):
    model = Zoo.load_model(**model_args, **other_args)
    similar = Zoo.search_similar_models(model)
    assert len(similar) > 0

    for sim in similar:
        assert sim
        assert sim.domain == model.domain
        assert sim.sub_domain == model.sub_domain
        assert sim.architecture == model.architecture
        assert sim.sub_architecture == model.sub_architecture
Exemple #3
0
def test_load_base_model_from_recipe(recipe_args, other_args):
    recipe = Zoo.load_recipe(**recipe_args, **other_args)
    recipe_model = Zoo.load_base_model_from_recipe(recipe, **other_args)
    model_dict = recipe_model.dict()
    for field, value in recipe_args.items():
        if field == "recipe_type":
            continue
        if field == "sparse_name":
            assert model_dict[field] == "base"
        elif field == "sparse_category":
            assert model_dict[field] == "none"
        elif field == "sparse_target":
            assert model_dict[field] is None
        else:
            assert model_dict[field] == value
Exemple #4
0
def test_search_optimized_models(model_args, other_args):
    model = Zoo.load_model(**model_args, **other_args)
    optimized = Zoo.search_optimized_models(model)
    assert len(optimized) > 0

    for sim in optimized:
        assert sim
        assert sim.domain == model.domain
        assert sim.sub_domain == model.sub_domain
        assert sim.architecture == model.architecture
        assert sim.sub_architecture == model.sub_architecture
        assert sim.framework == model.framework
        assert sim.repo == model.repo
        assert sim.dataset == model.dataset
        assert sim.training_scheme == model.training_scheme
Exemple #5
0
def test_search_sparse_recipes_from_stub(model_stub, other_args):
    model = Zoo.load_model_from_stub(model_stub, **other_args)
    recipes = Zoo.search_sparse_recipes(model_stub)
    assert len(recipes) > 0

    for recipe in recipes:
        assert recipe
        assert recipe.model_metadata.domain == model.domain
        assert recipe.model_metadata.sub_domain == model.sub_domain
        assert recipe.model_metadata.architecture == model.architecture
        assert recipe.model_metadata.sub_architecture == model.sub_architecture
        assert recipe.model_metadata.framework == model.framework
        assert recipe.model_metadata.repo == model.repo
        assert recipe.model_metadata.dataset == model.dataset
        assert recipe.model_metadata.training_scheme == model.training_scheme
Exemple #6
0
def test_quantize_model_post_training_mnist():
    # Prepare model paths
    mnist_model_path = Zoo.search_models(
        domain="cv",
        sub_domain="classification",
        architecture="mnistnet",
        framework="pytorch",
    )[0].onnx_file.downloaded_path()
    quant_model_path = tempfile.NamedTemporaryFile(suffix=".onnx", delete=False).name

    # Prepare sample validation dataset
    batch_size = 1
    val_dataset = MNISTDataset(train=False)
    input_dict = [{"input": img.numpy()} for (img, _) in val_dataset]
    data_loader = DataLoader(input_dict, None, batch_size)

    # Run calibration and quantization
    quantize_model_post_training(
        mnist_model_path, data_loader, quant_model_path, show_progress=False
    )

    # Verify that ResNet identity has no affect
    _test_resnet_identity_quant(quant_model_path, False, False)

    # Verify Convs and MatMuls are quantized
    _test_model_is_quantized(mnist_model_path, quant_model_path)

    # Verify quant model accuracy
    test_data_loader = DataLoader(input_dict, None, 1)  # initialize a new generator
    _test_quant_model_output(
        mnist_model_path, quant_model_path, test_data_loader, [0], batch_size
    )

    # Clean up
    os.remove(quant_model_path)
Exemple #7
0
def test_load_model_from_stub(stub, model_args, other_args):
    model = Zoo.load_model_from_stub(stub, **other_args)
    model.download(overwrite=True)
    for key in model_args:
        if key and hasattr(model, key):
            assert getattr(model, key) == model_args[key]
    shutil.rmtree(model.dir_path)
Exemple #8
0
def dataloader_models(request) -> DataloaderModelFixture:
    model_args, input_shapes, output_shapes, data_types = request.param
    model = Zoo.load_model(**model_args)
    model_path = model.onnx_file.downloaded_path()

    return DataloaderModelFixture(model_path, input_shapes, output_shapes,
                                  data_types)
Exemple #9
0
def load_data(data_path: str, ) -> List[List[numpy.ndarray]]:
    """
    Loads data from given sparseZoo stub or directory with .npz files
    :param data_path: directory path to .npz files to load or SparseZoo stub
    :return: List of loaded npz files
    """

    if data_path.startswith("zoo:"):
        data_dir = Zoo.load_model_from_stub(
            data_path).data_inputs.downloaded_path()
    else:
        data_dir = data_path
        data_files = os.listdir(data_dir)
        if any(".npz" not in file_name for file_name in data_files):
            raise RuntimeError(
                f"All files in data directory {data_dir} must have a .npz extension "
                f"found {[name for name in data_files if '.npz' not in name]}")

    samples = load_numpy_list(data_dir)
    # unwrap unloaded numpy files
    samples = [
        load_numpy(sample) if isinstance(sample, str) else sample
        for sample in samples
    ]

    processed_samples = []
    for idx, sample in enumerate(samples):
        sample = list(sample.values())
        processed_samples.append(sample)

    return processed_samples
Exemple #10
0
def create_model(args: Any, num_classes: int) -> Module:
    """
    :param args: object with configuration for model classes
    :param num_classes: Integer representing the number of output classes
    :returns: A Module object representing the created model
    """
    with torch_distributed_zero_first(
            args.local_rank):  # only download once locally
        if args.checkpoint_path == "zoo":
            if args.recipe_path and args.recipe_path.startswith("zoo:"):
                args.checkpoint_path = Zoo.download_recipe_base_framework_files(
                    args.recipe_path, extensions=[".pth"])[0]
            else:
                raise ValueError(
                    "'zoo' provided as --checkpoint-path but a SparseZoo stub"
                    " prefixed by 'zoo:' not provided as --recipe-path")

        model = ModelRegistry.create(
            args.arch_key,
            args.pretrained,
            args.checkpoint_path,
            args.pretrained_dataset,
            num_classes=num_classes,
            **args.model_kwargs,
        )
    print(f"created model: {model}")
    return model
Exemple #11
0
def test_search_sparse_models(model_args, other_args):
    model = Zoo.load_model(**model_args, **other_args)
    sparse = Zoo.search_sparse_models(model)
    assert len(sparse) > 0

    for sim in sparse:
        assert sim
        assert not sim.is_base
        assert sim.domain == model.domain
        assert sim.sub_domain == model.sub_domain
        assert sim.architecture == model.architecture
        assert sim.sub_architecture == model.sub_architecture
        assert sim.framework == model.framework
        assert sim.repo == model.repo
        assert sim.dataset == model.dataset
        assert sim.training_scheme == model.training_scheme
Exemple #12
0
def model_to_path(model: Union[str, Model, File]) -> str:
    """
    Deals with the various forms a model can take. Either an ONNX file,
    a SparseZoo model stub prefixed by 'zoo:', a SparseZoo Model object,
    or a SparseZoo ONNX File object that defines the neural network
    """
    if not model:
        raise ValueError(
            "model must be a path, sparsezoo.Model, or sparsezoo.File")

    if isinstance(model, str) and model.startswith("zoo:"):
        # load SparseZoo Model from stub
        if sparsezoo_import_error is not None:
            raise sparsezoo_import_error
        model = Zoo.load_model_from_stub(model)

    if Model is not object and isinstance(model, Model):
        # default to the main onnx file for the model
        model = model.onnx_file.downloaded_path()
    elif File is not object and isinstance(model, File):
        # get the downloaded_path -- will auto download if not on local system
        model = model.downloaded_path()

    if not isinstance(model, str):
        raise ValueError("unsupported type for model: {}".format(type(model)))

    if not os.path.exists(model):
        raise ValueError("model path must exist: given {}".format(model))

    return model
Exemple #13
0
def test_search_sparse_recipes(model_args, other_args, other_recipe_args):
    model = Zoo.load_model(**model_args, **other_args)
    recipes = Zoo.search_sparse_recipes(model, **other_recipe_args)
    assert len(recipes) > 0

    for recipe in recipes:
        assert recipe
        assert recipe.model_metadata.domain == model.domain
        assert recipe.model_metadata.sub_domain == model.sub_domain
        assert recipe.model_metadata.architecture == model.architecture
        assert recipe.model_metadata.sub_architecture == model.sub_architecture
        assert recipe.model_metadata.framework == model.framework
        assert recipe.model_metadata.repo == model.repo
        assert recipe.model_metadata.dataset == model.dataset
        assert recipe.model_metadata.training_scheme == model.training_scheme

        if "recipe_type" in other_recipe_args:
            assert recipe.recipe_type == other_recipe_args["recipe_type"]
Exemple #14
0
def test_search_models(model_args, other_args):
    models = Zoo.search_models(**model_args, **other_args)

    for model in models:
        for key, value in model_args.items():
            assert getattr(model, key) == value

    if "page_length" in other_args:
        assert len(models) <= other_args["page_length"]
Exemple #15
0
def modify_yolo_onnx_input_shape(
    model_path: str, image_shape: Tuple[int]
) -> Tuple[str, Optional[NamedTemporaryFile]]:
    """
    Creates a new YOLOv3 ONNX model from the given path that accepts the given input
    shape. If the given model already has the given input shape no modifications are
    made. Uses a tempfile to store the modified model file.

    :param model_path: file path to YOLOv3 ONNX model or SparseZoo stub of the model
        to be loaded
    :param image_shape: 2-tuple of the image shape to resize this yolo model to
    :return: filepath to an onnx model reshaped to the given input shape will be the
        original path if the shape is the same.  Additionally returns the
        NamedTemporaryFile for managing the scope of the object for file deletion
    """
    original_model_path = model_path
    if model_path.startswith("zoo:"):
        # load SparseZoo Model from stub
        model = Zoo.load_model_from_stub(model_path)
        model_path = model.onnx_file.downloaded_path()
        print(f"Downloaded {original_model_path} to {model_path}")

    model = onnx.load(model_path)
    model_input = model.graph.input[0]

    initial_x = get_tensor_dim_shape(model_input, 2)
    initial_y = get_tensor_dim_shape(model_input, 3)

    if not (isinstance(initial_x, int) and isinstance(initial_y, int)):
        return model_path, None  # model graph does not have static integer input shape

    if (initial_x, initial_y) == tuple(image_shape):
        return model_path, None  # no shape modification needed

    scale_x = initial_x / image_shape[0]
    scale_y = initial_y / image_shape[1]
    set_tensor_dim_shape(model_input, 2, image_shape[0])
    set_tensor_dim_shape(model_input, 3, image_shape[1])

    for model_output in model.graph.output:
        output_x = get_tensor_dim_shape(model_output, 2)
        output_y = get_tensor_dim_shape(model_output, 3)
        set_tensor_dim_shape(model_output, 2, int(output_x / scale_x))
        set_tensor_dim_shape(model_output, 3, int(output_y / scale_y))

    tmp_file = NamedTemporaryFile()  # file will be deleted after program exit
    onnx.save(model, tmp_file.name)

    print(
        f"Overwriting original model shape {(initial_x, initial_y)} to {image_shape}\n"
        f"Original model path: {original_model_path}, new temporary model saved to "
        f"{tmp_file.name}"
    )

    return tmp_file.name, tmp_file
Exemple #16
0
def test_search_recipes(recipe_args, other_args):
    recipes = Zoo.search_recipes(**recipe_args, **other_args)

    for recipe in recipes:
        for key, value in recipe_args.items():
            if key == "recipe_type":
                assert getattr(recipe, key) == value
            else:
                assert getattr(recipe.model_metadata, key) == value

    if "page_length" in other_args:
        assert len(recipes) <= other_args["page_length"]
Exemple #17
0
def onnx_models_with_data(request) -> OnnxModelDataFixture:
    model_args = request.param
    model = Zoo.load_model(**model_args)
    model_path = model.onnx_file.downloaded_path()
    data_paths = [data_file.downloaded_path() for data_file in model.data.values()]
    inputs_paths = None
    outputs_paths = None
    for path in data_paths:
        if "sample-inputs" in path:
            inputs_paths = path
        elif "sample-outputs" in path:
            outputs_paths = path
    return OnnxModelDataFixture(model_path, inputs_paths, outputs_paths)
Exemple #18
0
def onnx_repo_models(request) -> OnnxRepoModelFixture:
    model_args, model_name = request.param
    model = Zoo.load_model(**model_args)
    model_path = model.onnx_file.downloaded_path()
    data_paths = [data_file.downloaded_path() for data_file in model.data.values()]

    input_paths = None
    output_paths = None
    for path in data_paths:
        if "sample-inputs" in path:
            input_paths = path
        elif "sample-outputs" in path:
            output_paths = path
    return OnnxRepoModelFixture(model_path, model_name, input_paths, output_paths)
Exemple #19
0
def _load_model(args) -> Tuple[Any, List[str]]:
    if args.engine == ORT_ENGINE and ort_error is not None:
        raise ort_error

    # validation
    if (args.num_cores is not None and args.engine == ORT_ENGINE
            and onnxruntime.__version__ < "1.7"):
        raise ValueError(
            "overriding default num_cores not supported for onnxruntime < 1.7.0. "
            "If using an older build with OpenMP, try setting the OMP_NUM_THREADS "
            "environment variable")

    # load model from sparsezoo if necessary
    if args.model_filepath.startswith("zoo:"):
        zoo_model = Zoo.load_model_from_stub(args.model_filepath)
        downloaded_path = zoo_model.onnx_file.downloaded_path()
        print(
            f"downloaded sparsezoo model {args.model_filepath} to {downloaded_path}"
        )
        args.model_filepath = downloaded_path

    # scale static ONNX graph to desired image shape
    input_names = []
    if args.engine in [DEEPSPARSE_ENGINE, ORT_ENGINE]:
        args.model_filepath, input_names, _ = overwrite_transformer_onnx_model_inputs(
            args.model_filepath,
            batch_size=args.batch_size,
            max_length=args.max_sequence_length,
        )

    # load model
    if args.engine == DEEPSPARSE_ENGINE:
        print(f"Compiling deepsparse model for {args.model_filepath}")
        model = compile_model(args.model_filepath, args.batch_size,
                              args.num_cores)
        print(f"Engine info: {model}")
    elif args.engine == ORT_ENGINE:
        print(f"loading onnxruntime model for {args.model_filepath}")

        sess_options = onnxruntime.SessionOptions()
        if args.num_cores is not None:
            sess_options.intra_op_num_threads = args.num_cores
        sess_options.log_severity_level = 3
        sess_options.graph_optimization_level = (
            onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL)
        model = onnxruntime.InferenceSession(args.model_filepath,
                                             sess_options=sess_options)

    return model, input_names
def analyzer_models_repo(request):
    model_args, output_path = request.param
    output_path = os.path.join(RELATIVE_PATH, "test_analyzer_model_data", output_path)
    model = Zoo.load_model(**model_args)
    model_path = model.onnx_file.downloaded_path()

    if GENERATE_TEST_FILES:
        analyzer = ModelAnalyzer(model_path)
        analyzer.save_json(output_path)

    output = {}
    with open(output_path) as output_file:
        output = dict(json.load(output_file))

    return model_path, output
Exemple #21
0
def get_onnx_path_and_configs(
    model_path: str, ) -> Tuple[str, Optional[str], Optional[str]]:
    """
    :param model_path: path to onnx file, transformers sparsezoo stub,
        or directory containing `model.onnx`, `config.json`, and/or
        `tokenizer.json` files. If no `model.onnx` file is found in
        a model directory, an exception will be raised
    :return: tuple of ONNX file path, parent directory of config file
        if it exists, and parent directory of tokenizer config file if it
        exists. (Parent directories returned instead of absolute path
        for compatibility with transformers .from_pretrained() method)
    """
    if os.path.isfile(model_path):
        return model_path, None, None

    config_path = None
    tokenizer_path = None
    if os.path.isdir(model_path):
        model_files = os.listdir(model_path)

        if _MODEL_DIR_ONNX_NAME not in model_files:
            raise ValueError(
                f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
                f"{model_path}. Be sure that an export of the model is written to "
                f"{os.path.join(model_path, _MODEL_DIR_ONNX_NAME)}")
        onnx_path = os.path.join(model_path, _MODEL_DIR_ONNX_NAME)

        if _MODEL_DIR_CONFIG_NAME in model_files:
            config_path = model_path
        if _MODEL_DIR_TOKENIZER_NAME in model_files:
            tokenizer_path = model_path

    elif model_path.startswith("zoo:"):
        zoo_model = Zoo.load_model_from_stub(model_path)
        onnx_path = zoo_model.onnx_file.downloaded_path()

        for framework_file in zoo_model.framework_files:
            if framework_file.display_name == _MODEL_DIR_CONFIG_NAME:
                config_path = _get_file_parent(
                    framework_file.downloaded_path())
            if "tokenizer" in framework_file.display_name:
                tokenizer_path = _get_file_parent(
                    framework_file.downloaded_path())
    else:
        raise ValueError(
            f"model_path {model_path} is not a valid file, directory, or zoo stub"
        )
    return onnx_path, config_path, tokenizer_path
Exemple #22
0
def load_recipe_yaml_str(file_path: Union[str, Recipe]) -> str:
    """
    Loads a YAML recipe file to a string or
    extracts recipe from YAML front matter in a sparsezoo markdown recipe card.
    Recipes can also be provided as SparseZoo model stubs or Recipe
    objects.

    YAML front matter: https://jekyllrb.com/docs/front-matter/

    :param file_path: file path to recipe YAML file or markdown recipe card or
        stub to a SparseZoo model whose recipe will be downloaded and loaded.
        SparseZoo stubs should be preceded by 'zoo:', and can contain an optional
        '?recipe_type=<type>' parameter or include a `/<type>` subpath. Can also
        be a SparseZoo Recipe object. i.e. '/path/to/local/recipe.yaml',
        'zoo:model/stub/path', 'zoo:model/stub/path?recipe_type=transfer_learn',
        'zoo:model/stub/path/transfer_learn'
    :return: the recipe YAML configuration loaded as a string
    """
    if isinstance(file_path, Recipe):
        # download and unwrap Recipe object
        file_path = file_path.downloaded_path()
    elif file_path.startswith("zoo:"):
        # download from zoo stub
        recipe = Zoo.download_recipe_from_stub(file_path)
        file_path = recipe.downloaded_path()

    extension = file_path.lower().split(".")[-1]
    if extension not in ["md", "yaml"]:
        raise ValueError(
            "Unsupported file extension for recipe. Excepted '.md' or '.yaml'. "
            "Received {}".format(file_path))
    with open(file_path, "r") as yaml_file:
        yaml_str = yaml_file.read()
        if extension == "md":
            # extract YAML front matter from markdown recipe card
            # adapted from
            # https://github.com/jonbeebe/frontmatter/blob/master/frontmatter
            yaml_delim = r"(?:---|\+\+\+)"
            yaml = r"(.*?)"
            re_pattern = r"^\s*" + yaml_delim + yaml + yaml_delim
            regex = re.compile(re_pattern, re.S | re.M)
            result = regex.search(yaml_str)
            if not result:
                raise RuntimeError(
                    "Could not extract YAML front matter from recipe card:"
                    " {}".format(file_path))
            yaml_str = result.group(1)
    return yaml_str
Exemple #23
0
def fix_onnx_input_shape(
    model_path: str,
    image_shape: Optional[Tuple[int]],
) -> Tuple[str, Optional[NamedTemporaryFile]]:
    """
    Creates a new ONNX model from the given path that accepts the given input
    shape. If the given model already has the given input shape no modifications are
    made. Uses a tempfile to store the modified model file.

    :param model_path: file path to ONNX model or SparseZoo stub of the model
        to be loaded
    :param image_shape: 2-tuple of the image shape to resize this model to, or None if
        no resizing needed
    :return: filepath to an onnx model reshaped to the given input shape will be the
        original path if the shape is the same.  Additionally returns the
        NamedTemporaryFile for managing the scope of the object for file deletion.
        Additionally returns the image-shape to benchmark the new model with.
    """
    original_model_path = model_path
    if model_path.startswith("zoo:"):
        # load SparseZoo Model from stub
        model = Zoo.load_model_from_stub(model_path)
        model_path = model.onnx_file.downloaded_path()
        print(f"Downloaded {original_model_path} to {model_path}")

    model = onnx.load(model_path)
    model_input = model.graph.input[0]

    original_x = get_tensor_dim_shape(model_input, 2)
    original_y = get_tensor_dim_shape(model_input, 3)
    original_image_shape = (original_x, original_y)

    if image_shape is None or original_image_shape == tuple(image_shape):
        return model_path, None, original_image_shape  # no shape modification needed

    set_tensor_dim_shape(model_input, 2, image_shape[0])
    set_tensor_dim_shape(model_input, 3, image_shape[1])

    tmp_file = NamedTemporaryFile()  # file will be deleted after program exit
    onnx.save(model, tmp_file.name)

    print(
        f"Overwriting original model shape {original_image_shape} to {image_shape}\n"
        f"Original model path: {original_model_path}, new temporary model saved to "
        f"{tmp_file.name}")

    return tmp_file.name, tmp_file, image_shape
Exemple #24
0
def _load_data(args, input_names) -> List[List[numpy.ndarray]]:
    if args.data_path.startswith("zoo:"):
        data_dir = Zoo.load_model_from_stub(
            args.data_path).data_inputs.downloaded_path()
    else:
        data_dir = args.data_path
        data_files = os.listdir(data_dir)
        if any(".npz" not in file_name for file_name in data_files):
            raise RuntimeError(
                f"All files in data directory {data_dir} must have a .npz extension "
                f"found {[name for name in data_files if '.npz' not in name]}")

    samples = load_numpy_list(data_dir)

    # unwrap unloaded numpy files
    samples = [
        load_numpy(sample) if isinstance(sample, str) else sample
        for sample in samples
    ]

    processed_samples = []
    warning_given = False
    for sample in samples:
        if not all(inp_name in sample for inp_name in
                   input_names) or len(input_names) != len(sample):
            if not warning_given:
                warnings.warn(
                    "input sample found whose input names do not match the model input "
                    "names, this may cause an exception during benchmarking")
                warning_given = True
            sample = list(sample.values())
        else:
            sample = [sample[inp_name] for inp_name in input_names]

        for idx, array in enumerate(sample):
            processed_array = numpy.zeros(
                [args.max_sequence_length, *array.shape[1:]],
                dtype=array.dtype,
            )
            if array.shape[0] < args.max_sequence_length:
                processed_array[:array.shape[0], ...] = array
            else:
                processed_array[:, ...] = array[:args.max_sequence_length, ...]
            sample[idx] = processed_array
        processed_samples.append(sample)
    return processed_samples
Exemple #25
0
def test_quantize_model_post_training_resnet50_imagenette():
    # Prepare model paths
    resnet50_imagenette_path = Zoo.load_model(
        domain="cv",
        sub_domain="classification",
        architecture="resnet_v1",
        sub_architecture="50",
        framework="pytorch",
        repo="sparseml",
        dataset="imagenette",
        training_scheme=None,
        sparse_name="base",
        sparse_category="none",
        sparse_target=None,
    ).onnx_file.downloaded_path()
    quant_model_path = tempfile.NamedTemporaryFile(suffix=".onnx", delete=False).name

    # Prepare sample validation dataset
    batch_size = 1
    val_dataset = ImagenetteDataset(train=False, dataset_size=ImagenetteSize.s320)
    input_dict = [{"input": img.numpy()} for (img, _) in val_dataset]
    data_loader = DataLoader(input_dict, None, batch_size)

    # Run calibration and quantization
    quantize_model_post_training(
        resnet50_imagenette_path,
        data_loader,
        quant_model_path,
        show_progress=False,
        run_extra_opt=False,
    )

    # Verify that ResNet identity optimization is successful and save output for testing
    _test_resnet_identity_quant(quant_model_path, True, True)

    # Verify Convs and MatMuls are quantized
    _test_model_is_quantized(resnet50_imagenette_path, quant_model_path)

    # Verify quant model accuracy
    test_data_loader = DataLoader(input_dict, None, 1)  # initialize a new generator
    _test_quant_model_output(
        resnet50_imagenette_path, quant_model_path, test_data_loader, [1], batch_size
    )

    # Clean up
    os.remove(quant_model_path)
Exemple #26
0
def test_onnx_node_sparsities():
    # runs through nearly all other onnx functions imported above as well
    models = Zoo.search_models(
        domain="cv",
        sub_domain="classification",
        architecture="mobilenet_v1",
        dataset="imagenet",
        framework="pytorch",
        sparse_name="pruned",
        sparse_category="moderate",
        repo="sparseml",
    )
    assert len(models) > 0

    for model in models:
        file_path = model.onnx_file.downloaded_path()

        tot, nodes = onnx_nodes_sparsities(file_path)

        assert len(nodes) == 28

        assert isinstance(tot, SparsityMeasurement)
        assert tot.sparsity > 0.5
        assert tot.params_count == 4209088
        assert tot.params_zero_count > 0.5 * tot.params_count

        for node, val in nodes.items():
            assert isinstance(val, SparsityMeasurement)
            assert val.params_count > 0

            if "sections" not in node and "classifier" not in node:
                continue
            if (
                "depth" in node
                or "sections.0" in node
                or "sections_0" in node
                or "sections.1" in node
                or "sections_1" in node
                or "output" in node
            ):
                continue

            assert val.sparsity > 0.2
            assert val.sparsity < 0.95
            assert val.params_zero_count > 0
Exemple #27
0
    def create_zoo_model(
        key: str,
        pretrained: Union[bool, str] = True,
        pretrained_dataset: str = None,
    ) -> Model:
        """
        Create a sparsezoo Model for the desired model in the zoo

        :param key: the model key (name) to retrieve
        :param pretrained: True to load pretrained weights; to load a specific version
            give a string with the name of the version (optim, optim-perf), default True
        :param pretrained_dataset: The dataset to load for the model
        :return: the sparsezoo Model reference for the given model
        """
        if key not in ModelRegistry._CONSTRUCTORS:
            raise ValueError(
                "key {} is not in the model registry; available: {}".format(
                    key, ModelRegistry._CONSTRUCTORS
                )
            )

        attributes = ModelRegistry._ATTRIBUTES[key]

        optim_name, optim_category, optim_target = parse_optimization_str(
            pretrained if isinstance(pretrained, str) else attributes.default_desc
        )

        return Zoo.load_model(
            attributes.domain,
            attributes.sub_domain,
            attributes.architecture,
            attributes.sub_architecture,
            KERAS_FRAMEWORK,
            attributes.repo_source,
            attributes.default_dataset
            if pretrained_dataset is None
            else pretrained_dataset,
            None,
            optim_name,
            optim_category,
            optim_target,
        )
Exemple #28
0
def main():
    setup_default_logging()
    args, args_text = _parse_args()

    args.prefetcher = not args.no_prefetcher
    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1
    args.device = 'cuda:0'
    args.world_size = 1
    args.rank = 0  # global rank
    if args.distributed:
        args.device = 'cuda:%d' % args.local_rank
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
        args.world_size = torch.distributed.get_world_size()
        args.rank = torch.distributed.get_rank()
        _logger.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
                     % (args.rank, args.world_size))
    else:
        _logger.info('Training with a single process on 1 GPUs.')
    assert args.rank >= 0

    # resolve AMP arguments based on PyTorch / Apex availability
    use_amp = None
    if args.amp:
        # for backwards compat, `--amp` arg tries apex before native amp
        if has_apex:
            args.apex_amp = True
        elif has_native_amp:
            args.native_amp = True
    if args.apex_amp and has_apex:
        use_amp = 'apex'
    elif args.native_amp and has_native_amp:
        use_amp = 'native'
    elif args.apex_amp or args.native_amp:
        _logger.warning("Neither APEX or native Torch AMP is available, using float32. "
                        "Install NVIDA apex or upgrade to PyTorch 1.6")

    torch.manual_seed(args.seed + args.rank)

    ####################################################################################
    # Start - SparseML optional load weights from SparseZoo
    ####################################################################################
    if args.initial_checkpoint == "zoo":
        # Load checkpoint from base weights associated with given SparseZoo recipe
        if args.sparseml_recipe.startswith("zoo:"):
            args.initial_checkpoint = Zoo.download_recipe_base_framework_files(
                args.sparseml_recipe,
                extensions=[".pth.tar", ".pth"]
            )[0]
        else:
            raise ValueError(
                "Attempting to load weights from SparseZoo recipe, but not given a "
                "SparseZoo recipe stub.  When initial-checkpoint is set to 'zoo'. "
                "sparseml-recipe must start with 'zoo:' and be a SparseZoo model "
                f"stub. sparseml-recipe was set to {args.sparseml_recipe}"
            )
    elif args.initial_checkpoint.startswith("zoo:"):
        # Load weights from a SparseZoo model stub
        zoo_model = Zoo.load_model_from_stub(args.initial_checkpoint)
        args.initial_checkpoint = zoo_model.download_framework_files(extensions=[".pth"])
    ####################################################################################
    # End - SparseML optional load weights from SparseZoo
    ####################################################################################

    model = create_model(
        args.model,
        pretrained=args.pretrained,
        num_classes=args.num_classes,
        drop_rate=args.drop,
        drop_connect_rate=args.drop_connect,  # DEPRECATED, use drop_path
        drop_path_rate=args.drop_path,
        drop_block_rate=args.drop_block,
        global_pool=args.gp,
        bn_tf=args.bn_tf,
        bn_momentum=args.bn_momentum,
        bn_eps=args.bn_eps,
        scriptable=args.torchscript,
        checkpoint_path=args.initial_checkpoint)
    if args.num_classes is None:
        assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.'
        args.num_classes = model.num_classes  # FIXME handle model default vs config num_classes more elegantly

    if args.local_rank == 0:
        _logger.info('Model %s created, param count: %d' %
                     (args.model, sum([m.numel() for m in model.parameters()])))

    data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)

    # setup augmentation batch splits for contrastive loss or split bn
    num_aug_splits = 0
    if args.aug_splits > 0:
        assert args.aug_splits > 1, 'A split of 1 makes no sense'
        num_aug_splits = args.aug_splits

    # enable split bn (separate bn stats per batch-portion)
    if args.split_bn:
        assert num_aug_splits > 1 or args.resplit
        model = convert_splitbn_model(model, max(num_aug_splits, 2))

    # move model to GPU, enable channels last layout if set
    model.cuda()
    if args.channels_last:
        model = model.to(memory_format=torch.channels_last)

    # setup synchronized BatchNorm for distributed training
    if args.distributed and args.sync_bn:
        assert not args.split_bn
        if has_apex and use_amp != 'native':
            # Apex SyncBN preferred unless native amp is activated
            model = convert_syncbn_model(model)
        else:
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        if args.local_rank == 0:
            _logger.info(
                'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
                'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')

    if args.torchscript:
        assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model'
        assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model'
        model = torch.jit.script(model)

    optimizer = create_optimizer(args, model)

    # setup automatic mixed-precision (AMP) loss scaling and op casting
    amp_autocast = suppress  # do nothing
    loss_scaler = None
    if use_amp == 'apex':
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
        loss_scaler = ApexScaler()
        if args.local_rank == 0:
            _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.')
    elif use_amp == 'native':
        amp_autocast = torch.cuda.amp.autocast
        loss_scaler = NativeScaler()
        if args.local_rank == 0:
            _logger.info('Using native Torch AMP. Training in mixed precision.')
    else:
        if args.local_rank == 0:
            _logger.info('AMP not enabled. Training in float32.')

    # optionally resume from a checkpoint
    resume_epoch = None
    if args.resume:
        resume_epoch = resume_checkpoint(
            model, args.resume,
            optimizer=None if args.no_resume_opt else optimizer,
            loss_scaler=None if args.no_resume_opt else loss_scaler,
            log_info=args.local_rank == 0)

    # setup exponential moving average of model weights, SWA could be used here too
    model_ema = None
    if args.model_ema:
        # Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
        model_ema = ModelEmaV2(
            model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else None)
        if args.resume:
            load_checkpoint(model_ema.module, args.resume, use_ema=True)

    # setup distributed training
    if args.distributed:
        if has_apex and use_amp != 'native':
            # Apex DDP preferred unless native amp is activated
            if args.local_rank == 0:
                _logger.info("Using NVIDIA APEX DistributedDataParallel.")
            model = ApexDDP(model, delay_allreduce=True)
        else:
            if args.local_rank == 0:
                _logger.info("Using native Torch DistributedDataParallel.")
            model = NativeDDP(model, device_ids=[args.local_rank])  # can use device str in Torch >= 1.1
        # NOTE: EMA model does not need to be wrapped by DDP

    # setup learning rate schedule and starting epoch
    lr_scheduler, num_epochs = create_scheduler(args, optimizer)
    start_epoch = 0
    if args.start_epoch is not None:
        # a specified start_epoch will always override the resume epoch
        start_epoch = args.start_epoch
    elif resume_epoch is not None:
        start_epoch = resume_epoch
    if lr_scheduler is not None and start_epoch > 0:
        lr_scheduler.step(start_epoch)

    # create the train and eval datasets
    dataset_train = create_dataset(
        args.dataset, root=args.data_dir, split=args.train_split, is_training=True, batch_size=args.batch_size)
    dataset_eval = create_dataset(
        args.dataset, root=args.data_dir, split=args.val_split, is_training=False, batch_size=args.batch_size)

    # setup mixup / cutmix
    collate_fn = None
    mixup_fn = None
    mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None
    if mixup_active:
        mixup_args = dict(
            mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax,
            prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode,
            label_smoothing=args.smoothing, num_classes=args.num_classes)
        if args.prefetcher:
            assert not num_aug_splits  # collate conflict (need to support deinterleaving in collate mixup)
            collate_fn = FastCollateMixup(**mixup_args)
        else:
            mixup_fn = Mixup(**mixup_args)

    # wrap dataset in AugMix helper
    if num_aug_splits > 1:
        dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits)

    # create data loaders w/ augmentation pipeiine
    train_interpolation = args.train_interpolation
    if args.no_aug or not train_interpolation:
        train_interpolation = data_config['interpolation']
    loader_train = create_loader(
        dataset_train,
        input_size=data_config['input_size'],
        batch_size=args.batch_size,
        is_training=True,
        use_prefetcher=args.prefetcher,
        no_aug=args.no_aug,
        re_prob=args.reprob,
        re_mode=args.remode,
        re_count=args.recount,
        re_split=args.resplit,
        scale=args.scale,
        ratio=args.ratio,
        hflip=args.hflip,
        vflip=args.vflip,
        color_jitter=args.color_jitter,
        auto_augment=args.aa,
        num_aug_splits=num_aug_splits,
        interpolation=train_interpolation,
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        collate_fn=collate_fn,
        pin_memory=args.pin_mem,
        use_multi_epochs_loader=args.use_multi_epochs_loader
    )

    loader_eval = create_loader(
        dataset_eval,
        input_size=data_config['input_size'],
        batch_size=args.validation_batch_size_multiplier * args.batch_size,
        is_training=False,
        use_prefetcher=args.prefetcher,
        interpolation=data_config['interpolation'],
        mean=data_config['mean'],
        std=data_config['std'],
        num_workers=args.workers,
        distributed=args.distributed,
        crop_pct=data_config['crop_pct'],
        pin_memory=args.pin_mem,
    )

    # setup loss function
    if args.jsd:
        assert num_aug_splits > 1  # JSD only valid with aug splits set
        train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing).cuda()
    elif mixup_active:
        # smoothing is handled with mixup target transform
        train_loss_fn = SoftTargetCrossEntropy().cuda()
    elif args.smoothing:
        train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing).cuda()
    else:
        train_loss_fn = nn.CrossEntropyLoss().cuda()
    validate_loss_fn = nn.CrossEntropyLoss().cuda()

    # setup checkpoint saver and eval metric tracking
    eval_metric = args.eval_metric
    best_metric = None
    best_epoch = None
    saver = None
    output_dir = ''
    if args.local_rank == 0:
        output_base = args.output if args.output else './output'
        exp_name = '-'.join([
            datetime.now().strftime("%Y%m%d-%H%M%S"),
            args.model,
            str(data_config['input_size'][-1])
        ])
        output_dir = get_outdir(output_base, 'train', exp_name)
        decreasing = True if eval_metric == 'loss' else False
        saver = CheckpointSaver(
            model=model, optimizer=optimizer, args=args, model_ema=model_ema, amp_scaler=loss_scaler,
            checkpoint_dir=output_dir, recovery_dir=output_dir, decreasing=decreasing, max_history=args.checkpoint_hist)
        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
            f.write(args_text)

    ####################################################################################
    # Start SparseML Integration
    ####################################################################################
    sparseml_loggers = (
        [PythonLogger(), TensorBoardLogger(log_path=output_dir)]
        if output_dir
        else None
    )
    manager = ScheduledModifierManager.from_yaml(args.sparseml_recipe)
    optimizer = ScheduledOptimizer(
        optimizer,
        model,
        manager,
        steps_per_epoch=len(loader_train),
        loggers=sparseml_loggers
    )
    # override lr scheduler if recipe makes any LR updates
    if any("LearningRate" in str(modifier) for modifier in manager.modifiers):
        _logger.info("Disabling timm LR scheduler, managing LR using SparseML recipe")
        lr_scheduler = None
    if manager.max_epochs:
        _logger.info(
            f"Overriding max_epochs to {manager.max_epochs} from SparseML recipe"
        )
        num_epochs = manager.max_epochs or num_epochs
    ####################################################################################
    # End SparseML Integration
    ####################################################################################

    if args.local_rank == 0:
        _logger.info('Scheduled epochs: {}'.format(num_epochs))

    try:
        for epoch in range(start_epoch, num_epochs):
            if args.distributed and hasattr(loader_train.sampler, 'set_epoch'):
                loader_train.sampler.set_epoch(epoch)

            train_metrics = train_one_epoch(
                epoch, model, loader_train, optimizer, train_loss_fn, args,
                lr_scheduler=lr_scheduler, saver=saver, output_dir=output_dir,
                amp_autocast=amp_autocast, loss_scaler=loss_scaler, model_ema=model_ema, mixup_fn=mixup_fn)

            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                if args.local_rank == 0:
                    _logger.info("Distributing BatchNorm running means and vars")
                distribute_bn(model, args.world_size, args.dist_bn == 'reduce')

            eval_metrics = validate(model, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast)

            if model_ema is not None and not args.model_ema_force_cpu:
                if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
                    distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce')
                ema_eval_metrics = validate(
                    model_ema.module, loader_eval, validate_loss_fn, args, amp_autocast=amp_autocast, log_suffix=' (EMA)')
                eval_metrics = ema_eval_metrics

            if lr_scheduler is not None:
                # step LR for next epoch
                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])

            update_summary(
                epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'),
                write_header=best_metric is None)

            if saver is not None:
                # save proper checkpoint with eval metric
                save_metric = eval_metrics[eval_metric]
                best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric)

        #################################################################################
        # Start SparseML ONNX Export
        #################################################################################
        if output_dir:
            _logger.info(
                f"training complete, exporting ONNX to {output_dir}/model.onnx"
            )
            exporter = ModuleExporter(model, output_dir)
            exporter.export_onnx(torch.randn((1, *data_config["input_size"])))
        #################################################################################
        # End SparseML ONNX Export
        #################################################################################

    except KeyboardInterrupt:
        pass
    if best_metric is not None:
        _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
Exemple #29
0
def test_download_recipe_base_framework_files(recipe_args, other_args):
    files = Zoo.download_recipe_base_framework_files(recipe_args, **other_args)
    assert len(files) > 0
Exemple #30
0
def test_load_model(model_args, other_args):
    model = Zoo.load_model(**model_args, **other_args)
    model.download(overwrite=True)
    validate_downloaded_model(model, model_args, other_args)
    shutil.rmtree(model.dir_path)