Beispiel #1
0
    def run(self) -> None:
        from modelci.hub.deployer.dispatcher import serve

        for job in iter(self.job_queue.get, None):
            # exit the queue
            if job is self._queue_finish_flag:
                break
            # start a new container if container not started
            if job.container_name is None:
                container = serve(save_path=job.model.saved_path,
                                  device=job.device)
                container_name = container.name
                # remember to clean-up the created container
                self._hold_container.put(container)
            else:
                container_name = job.container_name
            # change model status
            job.model.status = Status.RUNNING
            ModelService.update_model(job.model)

            profiler = Profiler(model_info=job.model,
                                server_name=container_name,
                                inspector=job.client)
            dpr = profiler.diagnose(device=job.device)
            ModelService.append_dynamic_profiling_result(job.model.id,
                                                         dynamic_result=dpr)

            # set model status to pass
            job.model.status = Status.PASS
            ModelService.update_model(job.model)

            if job.container_name is None:
                # get holding container
                self._hold_container.get().stop()
Beispiel #2
0
def serve_by_task(args):
    model_bo = retrieve_model_by_task(task=args.task)
    serve(model_bo[0].saved_path, device=args.device, name=args.name, batch_size=args.bs)
    # TODO: check if the service is dispatched sucessfully
    new_status = [item for item in model_bo[0].model_status if
                  item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED)]
    new_status.append(ModelStatus.IN_SERVICE)
    model_bo[0].model_status = new_status
    ModelService.update_model(model_bo[0])
Beispiel #3
0
 def terminate(self):
     if self._task:
         # trigger pytorch lighting training graceful shutdown via a ^C
         self._task.set_exception(KeyboardInterrupt())
         model_train_curd.update(TrainingJobUpdate(_id=self._id, status=Status.FAIL))
         model_bo = ModelService.get_model_by_id(self.model_id)
         model_bo.model_status.remove(ModelStatus.TRAINING)
         model_bo.model_status.append(ModelStatus.DRAFT)
         ModelService.update_model(model_bo)
Beispiel #4
0
def serve_by_name(args):
    model = args.model
    framework = Framework[args.framework.upper()]
    engine = Engine[args.engine.upper()]

    model_bo = retrieve_model(architecture=model, framework=framework, engine=engine)
    serve(model_bo[0].saved_path, device=args.device, name=args.name, batch_size=args.bs)

    # TODO: check if the service is dispatched sucessfully
    new_status = [item for item in model_bo[0].model_status if
                  item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED)]
    new_status.append(ModelStatus.IN_SERVICE)
    model_bo[0].model_status = new_status
    ModelService.update_model(model_bo[0])
Beispiel #5
0
    def start(self):
        def training_done_callback(future):
            model_train_curd.update(TrainingJobUpdate(_id=self._id, status=Status.PASS))
            # TODO: save to database and update model_status, engine
            print(self.export_model())

        self._task = self._executor.submit(self.trainer_engine.fit, self.model, **self._data_loader_kwargs)
        self._task.add_done_callback(training_done_callback)
        model_train_curd.update(TrainingJobUpdate(_id=self._id, status=Status.RUNNING))

        model_bo = ModelService.get_model_by_id(self.model_id)
        model_bo.model_status.remove(ModelStatus.DRAFT)
        model_bo.model_status.append(ModelStatus.TRAINING)
        ModelService.update_model(model_bo)
def test_update_model():
    model = ModelService.get_models('ResNet50')[0]
    model.acc = 0.9
    model.weight.weight = bytes([123, 255])

    # check if update success
    assert ModelService.update_model(model)

    model_ = ModelService.get_models('ResNet50')[0]

    # check updated model
    assert abs(model_.acc - 0.9) < 1e-6
    assert model_.weight.weight == model.weight.weight
Beispiel #7
0
def register_model(origin_model,
                   dataset: str,
                   metric: Dict[Metric, float],
                   task: Task,
                   inputs: List[IOShape],
                   outputs: List[IOShape],
                   model_input: Optional[List] = None,
                   architecture: str = None,
                   framework: Framework = None,
                   engine: Engine = None,
                   version: ModelVersion = None,
                   parent_model_id: Optional[str] = None,
                   convert: bool = True,
                   profile: bool = True,
                   model_status: List[ModelStatus] = None):
    """Upload a model to ModelDB.
    This function will upload the given model into the database with some variation. It may optionally generate a
        branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be
        scheduled for each generated model, in order to gain profiling results for model selection strategies.
        In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine`
        and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced
        from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced.

    TODO:
        This function has a super comprehensive logic, need to be simplified.

    Arguments:
        origin_model: The uploaded model without optimization. When `no_generate` flag is set, this parameter should
            be a str indicating model file path.
        architecture (str): Model architecture name. Default to None.
        framework (Framework): Framework name. Default to None.
        version (ModelVersion): Model version. Default to None.
        dataset (str): Model testing dataset.
        metric (Dict[Metric,float]): Scoring metric and its corresponding score used for model evaluation
        task (Task): Model task type.
        inputs (Iterable[IOShape]): Model input tensors.
        outputs (Iterable[IOShape]): Model output tensors.
        model_input: specify sample model input data
            TODO: specify more model conversion related params
        engine (Engine): Model optimization engine. Default to `Engine.NONE`.
        parent_model_id (Optional[str]): the parent model id of current model if this model is derived from a pre-existing one
        model_status (List[ModelStatus]): Indicate the status of current model in its lifecycle
        convert (bool): Flag for generation of model family. When set, `origin_model` should be a path to model saving
            file. Default to `True`.
        profile (bool): Flag for profiling uploaded (including converted) models. Default to `False`.
    """
    from modelci.controller import job_executor
    from modelci.controller.executor import Job

    model_dir_list = list()

    # type and existence check
    if isinstance(origin_model, str):
        model_dir = Path(origin_model).absolute()
        assert model_dir.exists(
        ), f'model weight does not exist at {origin_model}'

        if all([architecture, task, framework, engine, version]):
            # from explicit architecture, framework, engine and version
            ext = model_dir.suffix
            path = generate_path(architecture, task, framework, engine,
                                 version).with_suffix(ext)
            # if already in the destination folder
            if path == model_dir:
                pass
            # create destination folder
            else:
                if ext:
                    path.parent.mkdir(parents=True, exist_ok=True)
                else:
                    path.mkdir(parents=True, exist_ok=True)

                # copy to cached folder
                subprocess.call(['cp', model_dir, path])
        else:  # from implicit extracted from path, check validity of the path later at registration
            path = model_dir
        model_dir_list.append(path)
    elif framework == Framework.PYTORCH and engine in [
            Engine.PYTORCH, Engine.NONE
    ]:
        # save original pytorch model
        pytorch_dir = generate_path(
            task=task,
            model_name=architecture,
            framework=framework,
            engine=engine,
            version=str(version),
        )
        pytorch_dir.parent.mkdir(parents=True, exist_ok=True)
        save_path_with_ext = pytorch_dir.with_suffix('.pth')
        torch.save(origin_model, str(save_path_with_ext))
        model_dir_list.append(pytorch_dir.with_suffix('.pth'))

    if convert:
        # TODO: generate from path name
        # generate model variant
        model_dir_list.extend(
            _generate_model_family(origin_model,
                                   architecture,
                                   task,
                                   framework,
                                   filename=str(version),
                                   inputs=inputs,
                                   outputs=outputs,
                                   model_input=model_input))

    # register
    for model_dir in model_dir_list:
        parse_result = parse_path(model_dir)
        architecture = parse_result['architecture']
        task = parse_result['task']
        framework = parse_result['framework']
        engine = parse_result['engine']
        version = parse_result['version']
        filename = parse_result['filename']

        if model_status is not None:
            model_bo_status = model_status
        elif engine == Engine.PYTORCH:
            model_bo_status = [ModelStatus.PUBLISHED]
        else:
            model_bo_status = [ModelStatus.CONVERTED]

        with open(str(model_dir), 'rb') as f:
            model = ModelBO(name=architecture,
                            task=task,
                            framework=framework,
                            engine=engine,
                            version=version,
                            dataset=dataset,
                            metric=metric,
                            parent_model_id=parent_model_id,
                            inputs=inputs,
                            outputs=outputs,
                            model_status=model_bo_status,
                            weight=Weight(f, filename=filename))

            ModelService.post_model(model)
        # TODO refresh
        model = ModelService.get_models(name=architecture,
                                        task=task,
                                        framework=framework,
                                        engine=engine,
                                        version=version)[0]
        if model.engine == Engine.PYTORCH or model.engine == Engine.TFS:
            parent_model_id = model.id
        # profile registered model
        if profile and engine != Engine.PYTORCH:
            file = tf.keras.utils.get_file(
                "grace_hopper.jpg",
                "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg"
            )
            test_img_bytes = cv2.imread(file)

            kwargs = {
                'repeat_data': test_img_bytes,
                'batch_size': 32,
                'batch_num': 100,
                'asynchronous': False,
                'model_info': model,
            }

            new_status = [
                item for item in model.model_status
                if item is not (ModelStatus.CONVERTED or ModelStatus.PUBLISHED)
            ]
            new_status.append(ModelStatus.PROFILING)
            model.model_status = new_status
            ModelService.update_model(model)

            if engine == Engine.TORCHSCRIPT:
                client = CVTorchClient(**kwargs)
            elif engine == Engine.TFS:
                client = CVTFSClient(**kwargs)
            elif engine == Engine.ONNX:
                client = CVONNXClient(**kwargs)
            elif engine == Engine.TRT:
                client = CVTRTClient(**kwargs)
            else:
                raise ValueError(f'No such serving engine: {engine}')

            job_cuda = Job(client=client, device='cuda:0', model_info=model)
            # job_cpu = Job(client=client, device='cpu', model_info=model)
            job_executor.submit(job_cuda)
Beispiel #8
0
def register_model(
    model: MLModel,
    convert: bool = True,
    profile: bool = True,
) -> List[MLModel]:
    """Upload a model to ModelDB.
    This function will upload the given model into the database with some variation. It may optionally generate a
        branch of models (i.e. model family) with different optimization techniques. Besides, a benchmark will be
        scheduled for each generated model, in order to gain profiling results for model selection strategies.
        In the `no_generate` model(i.e. `no_generate` flag is set to be `True`), `architecture`, `framework`, `engine`
        and `version` could be None. If any of the above arguments is `None`, all of them will be auto induced
        from the origin_model path. An `ValueError` will be raised if the mata info cannot be induced.

    TODO:
        This function has a super comprehensive logic, need to be simplified.

    Arguments:
        model: Required inputs for register a model. All information is wrapped in such model.
        convert (bool): Flag for generation of model family. Default to True.
        profile (bool): Flag for profiling uploaded (including converted) models. Default to True.
    """
    models = list()

    model_dir_list = list()
    model.model_status = [ModelStatus.PUBLISHED]
    models.append(save(model))

    # generate model family
    if convert:
        model_dir_list.extend(converter.generate_model_family(model))

    # register
    model_data = model.dict(exclude={'weight', 'id', 'model_status', 'engine'})
    for model_dir in model_dir_list:
        parse_result = parse_path_plain(model_dir)
        engine = parse_result['engine']

        model_cvt = MLModel(**model_data,
                            weight=model_dir,
                            engine=engine,
                            model_status=[ModelStatus.CONVERTED])
        models.append(save(model_cvt))

    # profile registered model
    if profile:
        from modelci.controller import job_executor
        from modelci.controller.executor import Job

        file = tf.keras.utils.get_file(
            "grace_hopper.jpg",
            "https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg"
        )
        test_img_bytes = cv2.imread(file)

        kwargs = {
            'repeat_data': test_img_bytes,
            'batch_size': 32,
            'batch_num': 100,
            'asynchronous': False,
        }

        for model in models:
            model.model_status = [ModelStatus.PROFILING]
            ModelService.update_model(model)
            kwargs['model_info'] = model
            engine = model.engine

            if engine == Engine.TORCHSCRIPT:
                client = CVTorchClient(**kwargs)
            elif engine == Engine.TFS:
                client = CVTFSClient(**kwargs)
            elif engine == Engine.ONNX:
                client = CVONNXClient(**kwargs)
            elif engine == Engine.TRT:
                client = CVTRTClient(**kwargs)
            else:
                raise ValueError(f'No such serving engine: {engine}')

            job_cuda = Job(client=client, device='cuda:0', model_info=model)
            # job_cpu = Job(client=client, device='cpu', model_info=model)
            job_executor.submit(job_cuda)
            # job_executor.submit(job_cpu)

    return models