Beispiel #1
0
def _docker_pull_progress(docker_api, docker_image_name, logger, raise_exception=True):
    logger.info('Docker image will be pulled', extra={'image_name': docker_image_name})

    try:
        layers_total = {}
        layers_current = {}
        progress = Progress('Pulling dockerimage', 1, is_size=True, ext_logger=logger)
        for line in docker_api.api.pull(docker_image_name, stream=True, decode=True):
            layer_id = line.get("id", None)
            progress_details = line.get("progressDetail", {})
            if "total" in progress_details and "current" in progress_details:
                layers_total[layer_id] = progress_details["total"]
                layers_current[layer_id] = progress_details["current"]
                total = sum(layers_total.values())
                current = sum(layers_current.values())
                if total > progress.total:
                    progress.set(current, total)
                    progress.report_progress()
                elif (current - progress.current) / total > 0.01:
                    progress.set(current, total)
                    progress.report_progress()

            #print(json.dumps(line, indent=4))
        logger.info('Docker image has been pulled', extra={'image_name': docker_image_name})
    except DockerException as e:
        if raise_exception is True:
            raise DockerException('Unable to pull image: see actual error above. '
                                  'Please, run the task again or contact support team.')
        else:
            logger.warn("Pulling step is skipped. Unable to pull image: {!r}.".format(repr(e)))
Beispiel #2
0
def upload_video_project(dir, api, workspace_id, project_name=None, log_progress=True):
    project_fs = VideoProject.read_single(dir)
    if project_name is None:
        project_name = project_fs.name

    if api.project.exists(workspace_id, project_name):
        project_name = api.project.get_free_name(workspace_id, project_name)

    project = api.project.create(workspace_id, project_name, ProjectType.VIDEOS)
    api.project.update_meta(project.id, project_fs.meta.to_json())

    for dataset_fs in project_fs.datasets:
        dataset = api.dataset.create(project.id, dataset_fs.name)

        names, item_paths, ann_paths = [], [], []
        for item_name in dataset_fs:
            img_path, ann_path = dataset_fs.get_item_paths(item_name)
            names.append(item_name)
            item_paths.append(img_path)
            ann_paths.append(ann_path)

        progress_cb = None
        if log_progress:
            ds_progress = Progress('Uploading videos to dataset {!r}'.format(dataset.name), total_cnt=len(item_paths))
            progress_cb = ds_progress.iters_done_report

        item_infos = api.video.upload_paths(dataset.id, names, item_paths, progress_cb)
        item_ids = [item_info.id for item_info in item_infos]
        if log_progress:
            ds_progress = Progress('Uploading annotations to dataset {!r}'.format(dataset.name), total_cnt=len(item_paths))
            progress_cb = ds_progress.iters_done_report

        api.video.annotation.upload_paths(item_ids, ann_paths, project_fs.meta, progress_cb)

    return project.id, project.name
Beispiel #3
0
    def run_inference(self):
        inference_mode = InferenceModeFactory.create(
            self._inference_mode_config, self._in_project.meta,
            self._single_image_inference)
        out_project = Project(
            os.path.join(TaskPaths.RESULTS_DIR, self._in_project.name),
            OpenMode.CREATE)
        out_project.set_meta(inference_mode.out_meta)

        progress_bar = Progress('Model applying: ',
                                self._in_project.total_items)
        for in_dataset in self._in_project:
            out_dataset = out_project.create_dataset(in_dataset.name)
            for in_item_name in in_dataset:
                # Use output project meta so that we get an annotation that is already in the context of the output
                # project (with added object classes etc).
                in_item_paths = in_dataset.get_item_paths(in_item_name)
                in_img = sly_image.read(in_item_paths.img_path)
                in_ann = Annotation.load_json_file(in_item_paths.ann_path,
                                                   inference_mode.out_meta)
                logger.trace('Will process image',
                             extra={
                                 'dataset_name': in_dataset.name,
                                 'image_name': in_item_name
                             })
                inference_annotation = inference_mode.infer_annotate(
                    in_img, in_ann)
                out_dataset.add_item_file(in_item_name,
                                          in_item_paths.img_path,
                                          ann=inference_annotation)

                progress_bar.iter_done_report()

        report_inference_finished()
Beispiel #4
0
    def download_git_archive(self, ecosystem_item_id, app_id, version, save_path, log_progress=True, ext_logger=None):
        payload = {
            ApiField.ECOSYSTEM_ITEM_ID: ecosystem_item_id,
            ApiField.VERSION: version,
            "isArchive": True
        }
        if app_id is not None:
            payload[ApiField.APP_ID] = app_id

        response = self._api.post('ecosystem.file.download', payload, stream=True)
        if log_progress:
            if ext_logger is None:
                ext_logger = logger

            length = -1
            # Content-Length
            if "Content-Length" in response.headers:
                length = int(response.headers['Content-Length'])
            progress = Progress("Downloading: ", length, ext_logger=ext_logger, is_size=True)

        mb1 = 1024 * 1024
        ensure_base_path(save_path)
        with open(save_path, 'wb') as fd:
            log_size = 0
            for chunk in response.iter_content(chunk_size=mb1):
                fd.write(chunk)
                log_size += len(chunk)
                if log_progress and log_size > mb1:
                    progress.iters_done_report(log_size)
                    log_size = 0
Beispiel #5
0
def download_project(api, project_id, dest_dir, dataset_ids=None, log_progress=False, batch_size=10):
    dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
    project_fs = Project(dest_dir, OpenMode.CREATE)
    meta = ProjectMeta.from_json(api.project.get_meta(project_id))
    project_fs.set_meta(meta)

    for dataset_info in api.dataset.get_list(project_id):
        dataset_id = dataset_info.id
        if dataset_ids is not None and dataset_id not in dataset_ids:
            continue

        dataset_fs = project_fs.create_dataset(dataset_info.name)
        images = api.image.get_list(dataset_id)

        ds_progress = None
        if log_progress:
            ds_progress = Progress(
                'Downloading dataset: {!r}'.format(dataset_info.name), total_cnt=len(images))

        for batch in batched(images, batch_size):
            image_ids = [image_info.id for image_info in batch]
            image_names = [image_info.name for image_info in batch]

            # download images in numpy format
            batch_imgs_bytes = api.image.download_bytes(dataset_id, image_ids)

            # download annotations in json format
            ann_infos = api.annotation.download_batch(dataset_id, image_ids)
            ann_jsons = [ann_info.annotation for ann_info in ann_infos]

            for name, img_bytes, ann in zip(image_names, batch_imgs_bytes, ann_jsons):
                dataset_fs.add_item_raw_bytes(name, img_bytes, ann)

            if log_progress:
                ds_progress.iters_done_report(len(batch))
Beispiel #6
0
def download_video_project(api, project_id, dest_dir, dataset_ids=None, download_videos=True, log_progress=False):
    '''
    Download project with given id in destination directory
    :param api: Api class object
    :param project_id: int
    :param dest_dir: str
    :param dataset_ids: list of integers
    :param download_videos: bool
    :param log_progress: bool
    '''
    LOG_BATCH_SIZE = 1

    key_id_map = KeyIdMap()

    project_fs = VideoProject(dest_dir, OpenMode.CREATE)

    meta = ProjectMeta.from_json(api.project.get_meta(project_id))
    project_fs.set_meta(meta)

    datasets_infos = []
    if dataset_ids is not None:
        for ds_id in dataset_ids:
            datasets_infos.append(api.dataset.get_info_by_id(ds_id))
    else:
        datasets_infos = api.dataset.get_list(project_id)

    for dataset in datasets_infos:
        dataset_fs = project_fs.create_dataset(dataset.name)
        videos = api.video.get_list(dataset.id)

        ds_progress = None
        if log_progress:
            ds_progress = Progress('Downloading dataset: {!r}'.format(dataset.name), total_cnt=len(videos))
        for batch in batched(videos, batch_size=LOG_BATCH_SIZE):
            video_ids = [video_info.id for video_info in batch]
            video_names = [video_info.name for video_info in batch]

            ann_jsons = api.video.annotation.download_bulk(dataset.id, video_ids)

            for video_id, video_name, ann_json in zip(video_ids, video_names, ann_jsons):
                if video_name != ann_json[ApiField.VIDEO_NAME]:
                    raise RuntimeError("Error in api.video.annotation.download_batch: broken order")

                video_file_path = dataset_fs.generate_item_path(video_name)
                if download_videos is True:
                    api.video.download_path(video_id, video_file_path)
                else:
                    touch(video_file_path)

                dataset_fs.add_item_file(video_name,
                                         video_file_path,
                                         ann=VideoAnnotation.from_json(ann_json, project_fs.meta, key_id_map),
                                         _validate_item=False)

            ds_progress.iters_done_report(len(batch))

    project_fs.set_key_id_map(key_id_map)
def progress_report_thread_fn(in_project, progress_queue):
    """Gets inference result annotations from the queue and writes them to the output dataset.
    None result signals the thread to finish.
    """

    progress_bar = Progress('Model applying: ', in_project.total_items)
    while True:
        resp = progress_queue.get()
        if resp is not None:
            progress_bar.iter_done_report()
        else:
            break
Beispiel #8
0
def _docker_pull(docker_api, docker_image_name, logger, raise_exception=True):
    logger.info('Docker image will be pulled', extra={'image_name': docker_image_name})
    progress_dummy = Progress('Pulling image...', 1, ext_logger=logger)
    progress_dummy.iter_done_report()
    try:
        pulled_img = docker_api.images.pull(docker_image_name)
        logger.info('Docker image has been pulled', extra={'pulled': {'tags': pulled_img.tags, 'id': pulled_img.id}})
    except DockerException as e:
        if raise_exception is True:
            raise DockerException('Unable to pull image: see actual error above. '
                                  'Please, run the task again or contact support team.')
        else:
            logger.warn("Pulling step is skipped. Unable to pull image: {!r}.".format(str(e)))
Beispiel #9
0
    def run_evaluation(self):
        progress = Progress('metric evaluation', self._project_gt.total_items)
        for ds_name in self._project_gt.datasets.keys():
            ds_gt = self._project_gt.datasets.get(ds_name)
            ds_pred = self._project_pred.datasets.get(ds_name)

            for sample_name in ds_gt:
                try:
                    ann_gt = Annotation.load_json_file(ds_gt.get_ann_path(sample_name), self._project_gt.meta)
                    ann_pred = Annotation.load_json_file(ds_pred.get_ann_path(sample_name), self._project_pred.meta)
                    self._metric.add_pair(ann_gt, ann_pred)
                except ValueError as e:
                    logger.warning('An error has occured ({}). Sample "{}" in dataset "{}" will be skipped'
                                   .format(str(e), sample_name, ds_gt.name))
                progress.iter_done_report()
Beispiel #10
0
def download_tar(github_url,
                 tar_path,
                 github_token=None,
                 version="master",
                 log_progress=True):
    headers = {}
    if github_token is not None:
        headers = {"Authorization": "token {}".format(github_token)}

    ensure_base_path(tar_path)

    if ".git" not in github_url:
        github_url += ".git"
    tar_url = github_url.replace(".git", "/archive/{}.tar.gz".format(version))
    r = requests.get(tar_url, headers=headers, stream=True)
    if r.status_code != requests.codes.ok:
        Api._raise_for_status(r)

    progress = Progress("Downloading (KB)", len(r.content) / 1024)
    with open(tar_path, 'wb') as f:
        for chunk in r.iter_content(chunk_size=8192):
            f.write(chunk)
            progress.iters_done_report(len(chunk) / 1024)
Beispiel #11
0
def result_writer_thread_fn(in_project, inference_result_queue):
    """Gets inference result annotations from the queue and writes them to the output dataset.

    None result signals the thread to finish.
    """

    out_project = None
    progress_bar = Progress('Model applying: ', in_project.total_items)
    resp = ''
    while resp is not None:
        resp = inference_result_queue.get()
        if resp is not None:
            if out_project is None:
                out_dir = os.path.join(TaskPaths.RESULTS_DIR, in_project.name)
                out_project = Project(out_dir, OpenMode.CREATE)
                out_project.set_meta(ProjectMeta.from_json(resp.meta_json))
            out_dataset = out_project.datasets.get(resp.ds_name)
            if out_dataset is None:
                out_dataset = out_project.create_dataset(resp.ds_name)
            out_dataset.add_item_file(resp.item_name,
                                      resp.item_paths.img_path,
                                      ann=resp.ann_json)
            progress_bar.iter_done_report()
        inference_result_queue.task_done()
Beispiel #12
0
    def _construct_and_fill_model(self):
        # Progress reporting to show a progress bar in the UI.
        model_build_progress = Progress('Building model:', 1)

        # Check the class name --> index mapping to infer the number of model output dimensions.
        num_classes = max(self.class_title_to_idx.values()) + 1

        # Initialize the model.
        model = self._model_factory_fn(num_classes=num_classes,
                                       input_size=self._input_size,
                                       custom_model_config=self.config.get(
                                           CUSTOM_MODEL_CONFIG, {}))
        logger.info('Model has been instantiated.')

        # Load model weights appropriate for the given training mode.
        weights_rw = WeightsRW(TaskPaths.MODEL_DIR)
        weights_init_type = self.config[WEIGHTS_INIT_TYPE]
        if weights_init_type == TRANSFER_LEARNING:
            # For transfer learning, do not attempt to load the weights for the model head. The existing snapshot may
            # have been trained on a different dataset, even on a different set of classes, and is in general not
            # compatible with the current model even in terms of dimensions. The head of the model will be initialized
            # randomly.
            self._model = weights_rw.load_for_transfer_learning(
                model, ignore_matching_layers=[HEAD_LAYER], logger=logger)
        elif weights_init_type == CONTINUE_TRAINING:
            # Continuing training from an older snapshot requires full compatibility between the two models, including
            # class index mapping. Hence the snapshot weights must exactly match the structure of our model instance.
            self._model = weights_rw.load_strictly(model)

        # Model weights have been loaded, move them over to the GPU.
        self._model.cuda()

        # Advance the progress bar and log a progress message.
        logger.info('Weights have been loaded.',
                    extra={WEIGHTS_INIT_TYPE: weights_init_type})
        model_build_progress.iter_done_report()
 def _construct_and_fill_model(self):
     progress_dummy = Progress('Building model:', 1)
     progress_dummy.iter_done_report()
Beispiel #14
0
    def train(self):
        # Initialize the progesss bar in the UI.
        training_progress = Progress('Model training: ',
                                     self._epochs * self._train_iters)

        # Initialize the optimizer.
        optimizer = torch.optim.Adam(self._model.parameters(),
                                     lr=self.config[LR])
        # Running best loss value to determine which snapshot is the best so far.
        best_val_loss = float('inf')

        for epoch in range(self._epochs):
            logger.info("Starting new epoch", extra={'epoch': self.epoch_flt})
            for train_it, (inputs_cpu, targets_cpu) in enumerate(
                    self._data_loaders[TRAIN]):
                _check_all_pixels_have_segmentation_class(targets_cpu)

                # Switch the model into training mode to enable gradient backpropagation and batch norm running average
                # updates.
                self._model.train()

                # Copy input batch to the GPU, run inference and compute optimization loss.
                inputs_cuda, targets_cuda = Variable(
                    inputs_cpu).cuda(), Variable(targets_cpu).cuda()
                outputs_cuda = self._model(inputs_cuda)
                loss = self._optimization_loss_fn(outputs_cuda, targets_cuda)

                # Make a gradient descent step.
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                metric_values = {
                    name: metric_fn(outputs_cuda, targets_cuda).item()
                    for name, metric_fn in self._training_metrics_dict.items()
                }
                metric_values[LOSS] = loss.item()

                # Advance UI progess bar.
                training_progress.iter_done_report()
                # Compute fractional epoch value for more precise metrics reporting.
                self.epoch_flt = epoch_float(epoch, train_it + 1,
                                             self._train_iters)
                # Report metrics to be plotted in the training chart.
                report_metrics_training(self.epoch_flt, metric_values)

                # If needed, do validation and snapshotting.
                if self._eval_planner.need_validation(self.epoch_flt):
                    # Compute metrics on the validation dataset.
                    metrics_values_val = self._validation()

                    # Report progress.
                    self._eval_planner.validation_performed()

                    # Check whether the new weights are the best so far on the validation dataset.
                    val_loss = metrics_values_val[LOSS]
                    model_is_best = val_loss < best_val_loss
                    if model_is_best:
                        best_val_loss = val_loss

                    # Save a snapshot with the current weights. Mark whether the snapshot is the best so far in terms of
                    # validation loss.
                    self._save_model_snapshot(model_is_best,
                                              opt_data={
                                                  'epoch':
                                                  self.epoch_flt,
                                                  'val_metrics':
                                                  metrics_values_val,
                                              })

            # Report progress
            logger.info("Epoch has finished", extra={'epoch': self.epoch_flt})
Beispiel #15
0
def download_pointcloud_project(api, project_id, dest_dir, dataset_ids=None, download_items=True, log_progress=False):
    LOG_BATCH_SIZE = 1

    key_id_map = KeyIdMap()

    project_fs = PointcloudProject(dest_dir, OpenMode.CREATE)

    meta = ProjectMeta.from_json(api.project.get_meta(project_id))
    project_fs.set_meta(meta)

    datasets_infos = []
    if dataset_ids is not None:
        for ds_id in dataset_ids:
            datasets_infos.append(api.dataset.get_info_by_id(ds_id))
    else:
        datasets_infos = api.dataset.get_list(project_id)

    for dataset in datasets_infos:
        dataset_fs = project_fs.create_dataset(dataset.name)
        pointclouds = api.pointcloud.get_list(dataset.id)

        ds_progress = None
        if log_progress:
            ds_progress = Progress('Downloading dataset: {!r}'.format(dataset.name), total_cnt=len(pointclouds))
        for batch in batched(pointclouds, batch_size=LOG_BATCH_SIZE):
            pointcloud_ids = [pointcloud_info.id for pointcloud_info in batch]
            pointcloud_names = [pointcloud_info.name for pointcloud_info in batch]

            ann_jsons = api.pointcloud.annotation.download_bulk(dataset.id, pointcloud_ids)

            for pointcloud_id, pointcloud_name, ann_json in zip(pointcloud_ids, pointcloud_names, ann_jsons):
                if pointcloud_name != ann_json[ApiField.NAME]:
                    raise RuntimeError("Error in api.video.annotation.download_batch: broken order")

                pointcloud_file_path = dataset_fs.generate_item_path(pointcloud_name)
                if download_items is True:
                    api.pointcloud.download_path(pointcloud_id, pointcloud_file_path)

                    related_images_path = dataset_fs.get_related_images_path(pointcloud_name)
                    related_images = api.pointcloud.get_list_related_images(pointcloud_id)
                    for rimage_info in related_images:
                        name = rimage_info[ApiField.NAME]
                        rimage_id = rimage_info[ApiField.ID]

                        path_img = os.path.join(related_images_path, name)
                        path_json = os.path.join(related_images_path, name + ".json")

                        api.pointcloud.download_related_image(rimage_id, path_img)
                        dump_json_file(rimage_info, path_json)

                else:
                    touch(pointcloud_file_path)

                dataset_fs.add_item_file(pointcloud_name,
                                         pointcloud_file_path,
                                         ann=PointcloudAnnotation.from_json(ann_json, project_fs.meta, key_id_map),
                                         _validate_item=False)

            ds_progress.iters_done_report(len(batch))

    project_fs.set_key_id_map(key_id_map)