def _docker_pull_progress(docker_api, docker_image_name, logger, raise_exception=True): logger.info('Docker image will be pulled', extra={'image_name': docker_image_name}) try: layers_total = {} layers_current = {} progress = Progress('Pulling dockerimage', 1, is_size=True, ext_logger=logger) for line in docker_api.api.pull(docker_image_name, stream=True, decode=True): layer_id = line.get("id", None) progress_details = line.get("progressDetail", {}) if "total" in progress_details and "current" in progress_details: layers_total[layer_id] = progress_details["total"] layers_current[layer_id] = progress_details["current"] total = sum(layers_total.values()) current = sum(layers_current.values()) if total > progress.total: progress.set(current, total) progress.report_progress() elif (current - progress.current) / total > 0.01: progress.set(current, total) progress.report_progress() #print(json.dumps(line, indent=4)) logger.info('Docker image has been pulled', extra={'image_name': docker_image_name}) except DockerException as e: if raise_exception is True: raise DockerException('Unable to pull image: see actual error above. ' 'Please, run the task again or contact support team.') else: logger.warn("Pulling step is skipped. Unable to pull image: {!r}.".format(repr(e)))
def upload_video_project(dir, api, workspace_id, project_name=None, log_progress=True): project_fs = VideoProject.read_single(dir) if project_name is None: project_name = project_fs.name if api.project.exists(workspace_id, project_name): project_name = api.project.get_free_name(workspace_id, project_name) project = api.project.create(workspace_id, project_name, ProjectType.VIDEOS) api.project.update_meta(project.id, project_fs.meta.to_json()) for dataset_fs in project_fs.datasets: dataset = api.dataset.create(project.id, dataset_fs.name) names, item_paths, ann_paths = [], [], [] for item_name in dataset_fs: img_path, ann_path = dataset_fs.get_item_paths(item_name) names.append(item_name) item_paths.append(img_path) ann_paths.append(ann_path) progress_cb = None if log_progress: ds_progress = Progress('Uploading videos to dataset {!r}'.format(dataset.name), total_cnt=len(item_paths)) progress_cb = ds_progress.iters_done_report item_infos = api.video.upload_paths(dataset.id, names, item_paths, progress_cb) item_ids = [item_info.id for item_info in item_infos] if log_progress: ds_progress = Progress('Uploading annotations to dataset {!r}'.format(dataset.name), total_cnt=len(item_paths)) progress_cb = ds_progress.iters_done_report api.video.annotation.upload_paths(item_ids, ann_paths, project_fs.meta, progress_cb) return project.id, project.name
def run_inference(self): inference_mode = InferenceModeFactory.create( self._inference_mode_config, self._in_project.meta, self._single_image_inference) out_project = Project( os.path.join(TaskPaths.RESULTS_DIR, self._in_project.name), OpenMode.CREATE) out_project.set_meta(inference_mode.out_meta) progress_bar = Progress('Model applying: ', self._in_project.total_items) for in_dataset in self._in_project: out_dataset = out_project.create_dataset(in_dataset.name) for in_item_name in in_dataset: # Use output project meta so that we get an annotation that is already in the context of the output # project (with added object classes etc). in_item_paths = in_dataset.get_item_paths(in_item_name) in_img = sly_image.read(in_item_paths.img_path) in_ann = Annotation.load_json_file(in_item_paths.ann_path, inference_mode.out_meta) logger.trace('Will process image', extra={ 'dataset_name': in_dataset.name, 'image_name': in_item_name }) inference_annotation = inference_mode.infer_annotate( in_img, in_ann) out_dataset.add_item_file(in_item_name, in_item_paths.img_path, ann=inference_annotation) progress_bar.iter_done_report() report_inference_finished()
def download_git_archive(self, ecosystem_item_id, app_id, version, save_path, log_progress=True, ext_logger=None): payload = { ApiField.ECOSYSTEM_ITEM_ID: ecosystem_item_id, ApiField.VERSION: version, "isArchive": True } if app_id is not None: payload[ApiField.APP_ID] = app_id response = self._api.post('ecosystem.file.download', payload, stream=True) if log_progress: if ext_logger is None: ext_logger = logger length = -1 # Content-Length if "Content-Length" in response.headers: length = int(response.headers['Content-Length']) progress = Progress("Downloading: ", length, ext_logger=ext_logger, is_size=True) mb1 = 1024 * 1024 ensure_base_path(save_path) with open(save_path, 'wb') as fd: log_size = 0 for chunk in response.iter_content(chunk_size=mb1): fd.write(chunk) log_size += len(chunk) if log_progress and log_size > mb1: progress.iters_done_report(log_size) log_size = 0
def download_project(api, project_id, dest_dir, dataset_ids=None, log_progress=False, batch_size=10): dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None project_fs = Project(dest_dir, OpenMode.CREATE) meta = ProjectMeta.from_json(api.project.get_meta(project_id)) project_fs.set_meta(meta) for dataset_info in api.dataset.get_list(project_id): dataset_id = dataset_info.id if dataset_ids is not None and dataset_id not in dataset_ids: continue dataset_fs = project_fs.create_dataset(dataset_info.name) images = api.image.get_list(dataset_id) ds_progress = None if log_progress: ds_progress = Progress( 'Downloading dataset: {!r}'.format(dataset_info.name), total_cnt=len(images)) for batch in batched(images, batch_size): image_ids = [image_info.id for image_info in batch] image_names = [image_info.name for image_info in batch] # download images in numpy format batch_imgs_bytes = api.image.download_bytes(dataset_id, image_ids) # download annotations in json format ann_infos = api.annotation.download_batch(dataset_id, image_ids) ann_jsons = [ann_info.annotation for ann_info in ann_infos] for name, img_bytes, ann in zip(image_names, batch_imgs_bytes, ann_jsons): dataset_fs.add_item_raw_bytes(name, img_bytes, ann) if log_progress: ds_progress.iters_done_report(len(batch))
def download_video_project(api, project_id, dest_dir, dataset_ids=None, download_videos=True, log_progress=False): ''' Download project with given id in destination directory :param api: Api class object :param project_id: int :param dest_dir: str :param dataset_ids: list of integers :param download_videos: bool :param log_progress: bool ''' LOG_BATCH_SIZE = 1 key_id_map = KeyIdMap() project_fs = VideoProject(dest_dir, OpenMode.CREATE) meta = ProjectMeta.from_json(api.project.get_meta(project_id)) project_fs.set_meta(meta) datasets_infos = [] if dataset_ids is not None: for ds_id in dataset_ids: datasets_infos.append(api.dataset.get_info_by_id(ds_id)) else: datasets_infos = api.dataset.get_list(project_id) for dataset in datasets_infos: dataset_fs = project_fs.create_dataset(dataset.name) videos = api.video.get_list(dataset.id) ds_progress = None if log_progress: ds_progress = Progress('Downloading dataset: {!r}'.format(dataset.name), total_cnt=len(videos)) for batch in batched(videos, batch_size=LOG_BATCH_SIZE): video_ids = [video_info.id for video_info in batch] video_names = [video_info.name for video_info in batch] ann_jsons = api.video.annotation.download_bulk(dataset.id, video_ids) for video_id, video_name, ann_json in zip(video_ids, video_names, ann_jsons): if video_name != ann_json[ApiField.VIDEO_NAME]: raise RuntimeError("Error in api.video.annotation.download_batch: broken order") video_file_path = dataset_fs.generate_item_path(video_name) if download_videos is True: api.video.download_path(video_id, video_file_path) else: touch(video_file_path) dataset_fs.add_item_file(video_name, video_file_path, ann=VideoAnnotation.from_json(ann_json, project_fs.meta, key_id_map), _validate_item=False) ds_progress.iters_done_report(len(batch)) project_fs.set_key_id_map(key_id_map)
def progress_report_thread_fn(in_project, progress_queue): """Gets inference result annotations from the queue and writes them to the output dataset. None result signals the thread to finish. """ progress_bar = Progress('Model applying: ', in_project.total_items) while True: resp = progress_queue.get() if resp is not None: progress_bar.iter_done_report() else: break
def _docker_pull(docker_api, docker_image_name, logger, raise_exception=True): logger.info('Docker image will be pulled', extra={'image_name': docker_image_name}) progress_dummy = Progress('Pulling image...', 1, ext_logger=logger) progress_dummy.iter_done_report() try: pulled_img = docker_api.images.pull(docker_image_name) logger.info('Docker image has been pulled', extra={'pulled': {'tags': pulled_img.tags, 'id': pulled_img.id}}) except DockerException as e: if raise_exception is True: raise DockerException('Unable to pull image: see actual error above. ' 'Please, run the task again or contact support team.') else: logger.warn("Pulling step is skipped. Unable to pull image: {!r}.".format(str(e)))
def run_evaluation(self): progress = Progress('metric evaluation', self._project_gt.total_items) for ds_name in self._project_gt.datasets.keys(): ds_gt = self._project_gt.datasets.get(ds_name) ds_pred = self._project_pred.datasets.get(ds_name) for sample_name in ds_gt: try: ann_gt = Annotation.load_json_file(ds_gt.get_ann_path(sample_name), self._project_gt.meta) ann_pred = Annotation.load_json_file(ds_pred.get_ann_path(sample_name), self._project_pred.meta) self._metric.add_pair(ann_gt, ann_pred) except ValueError as e: logger.warning('An error has occured ({}). Sample "{}" in dataset "{}" will be skipped' .format(str(e), sample_name, ds_gt.name)) progress.iter_done_report()
def download_tar(github_url, tar_path, github_token=None, version="master", log_progress=True): headers = {} if github_token is not None: headers = {"Authorization": "token {}".format(github_token)} ensure_base_path(tar_path) if ".git" not in github_url: github_url += ".git" tar_url = github_url.replace(".git", "/archive/{}.tar.gz".format(version)) r = requests.get(tar_url, headers=headers, stream=True) if r.status_code != requests.codes.ok: Api._raise_for_status(r) progress = Progress("Downloading (KB)", len(r.content) / 1024) with open(tar_path, 'wb') as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) progress.iters_done_report(len(chunk) / 1024)
def result_writer_thread_fn(in_project, inference_result_queue): """Gets inference result annotations from the queue and writes them to the output dataset. None result signals the thread to finish. """ out_project = None progress_bar = Progress('Model applying: ', in_project.total_items) resp = '' while resp is not None: resp = inference_result_queue.get() if resp is not None: if out_project is None: out_dir = os.path.join(TaskPaths.RESULTS_DIR, in_project.name) out_project = Project(out_dir, OpenMode.CREATE) out_project.set_meta(ProjectMeta.from_json(resp.meta_json)) out_dataset = out_project.datasets.get(resp.ds_name) if out_dataset is None: out_dataset = out_project.create_dataset(resp.ds_name) out_dataset.add_item_file(resp.item_name, resp.item_paths.img_path, ann=resp.ann_json) progress_bar.iter_done_report() inference_result_queue.task_done()
def _construct_and_fill_model(self): # Progress reporting to show a progress bar in the UI. model_build_progress = Progress('Building model:', 1) # Check the class name --> index mapping to infer the number of model output dimensions. num_classes = max(self.class_title_to_idx.values()) + 1 # Initialize the model. model = self._model_factory_fn(num_classes=num_classes, input_size=self._input_size, custom_model_config=self.config.get( CUSTOM_MODEL_CONFIG, {})) logger.info('Model has been instantiated.') # Load model weights appropriate for the given training mode. weights_rw = WeightsRW(TaskPaths.MODEL_DIR) weights_init_type = self.config[WEIGHTS_INIT_TYPE] if weights_init_type == TRANSFER_LEARNING: # For transfer learning, do not attempt to load the weights for the model head. The existing snapshot may # have been trained on a different dataset, even on a different set of classes, and is in general not # compatible with the current model even in terms of dimensions. The head of the model will be initialized # randomly. self._model = weights_rw.load_for_transfer_learning( model, ignore_matching_layers=[HEAD_LAYER], logger=logger) elif weights_init_type == CONTINUE_TRAINING: # Continuing training from an older snapshot requires full compatibility between the two models, including # class index mapping. Hence the snapshot weights must exactly match the structure of our model instance. self._model = weights_rw.load_strictly(model) # Model weights have been loaded, move them over to the GPU. self._model.cuda() # Advance the progress bar and log a progress message. logger.info('Weights have been loaded.', extra={WEIGHTS_INIT_TYPE: weights_init_type}) model_build_progress.iter_done_report()
def _construct_and_fill_model(self): progress_dummy = Progress('Building model:', 1) progress_dummy.iter_done_report()
def train(self): # Initialize the progesss bar in the UI. training_progress = Progress('Model training: ', self._epochs * self._train_iters) # Initialize the optimizer. optimizer = torch.optim.Adam(self._model.parameters(), lr=self.config[LR]) # Running best loss value to determine which snapshot is the best so far. best_val_loss = float('inf') for epoch in range(self._epochs): logger.info("Starting new epoch", extra={'epoch': self.epoch_flt}) for train_it, (inputs_cpu, targets_cpu) in enumerate( self._data_loaders[TRAIN]): _check_all_pixels_have_segmentation_class(targets_cpu) # Switch the model into training mode to enable gradient backpropagation and batch norm running average # updates. self._model.train() # Copy input batch to the GPU, run inference and compute optimization loss. inputs_cuda, targets_cuda = Variable( inputs_cpu).cuda(), Variable(targets_cpu).cuda() outputs_cuda = self._model(inputs_cuda) loss = self._optimization_loss_fn(outputs_cuda, targets_cuda) # Make a gradient descent step. optimizer.zero_grad() loss.backward() optimizer.step() metric_values = { name: metric_fn(outputs_cuda, targets_cuda).item() for name, metric_fn in self._training_metrics_dict.items() } metric_values[LOSS] = loss.item() # Advance UI progess bar. training_progress.iter_done_report() # Compute fractional epoch value for more precise metrics reporting. self.epoch_flt = epoch_float(epoch, train_it + 1, self._train_iters) # Report metrics to be plotted in the training chart. report_metrics_training(self.epoch_flt, metric_values) # If needed, do validation and snapshotting. if self._eval_planner.need_validation(self.epoch_flt): # Compute metrics on the validation dataset. metrics_values_val = self._validation() # Report progress. self._eval_planner.validation_performed() # Check whether the new weights are the best so far on the validation dataset. val_loss = metrics_values_val[LOSS] model_is_best = val_loss < best_val_loss if model_is_best: best_val_loss = val_loss # Save a snapshot with the current weights. Mark whether the snapshot is the best so far in terms of # validation loss. self._save_model_snapshot(model_is_best, opt_data={ 'epoch': self.epoch_flt, 'val_metrics': metrics_values_val, }) # Report progress logger.info("Epoch has finished", extra={'epoch': self.epoch_flt})
def download_pointcloud_project(api, project_id, dest_dir, dataset_ids=None, download_items=True, log_progress=False): LOG_BATCH_SIZE = 1 key_id_map = KeyIdMap() project_fs = PointcloudProject(dest_dir, OpenMode.CREATE) meta = ProjectMeta.from_json(api.project.get_meta(project_id)) project_fs.set_meta(meta) datasets_infos = [] if dataset_ids is not None: for ds_id in dataset_ids: datasets_infos.append(api.dataset.get_info_by_id(ds_id)) else: datasets_infos = api.dataset.get_list(project_id) for dataset in datasets_infos: dataset_fs = project_fs.create_dataset(dataset.name) pointclouds = api.pointcloud.get_list(dataset.id) ds_progress = None if log_progress: ds_progress = Progress('Downloading dataset: {!r}'.format(dataset.name), total_cnt=len(pointclouds)) for batch in batched(pointclouds, batch_size=LOG_BATCH_SIZE): pointcloud_ids = [pointcloud_info.id for pointcloud_info in batch] pointcloud_names = [pointcloud_info.name for pointcloud_info in batch] ann_jsons = api.pointcloud.annotation.download_bulk(dataset.id, pointcloud_ids) for pointcloud_id, pointcloud_name, ann_json in zip(pointcloud_ids, pointcloud_names, ann_jsons): if pointcloud_name != ann_json[ApiField.NAME]: raise RuntimeError("Error in api.video.annotation.download_batch: broken order") pointcloud_file_path = dataset_fs.generate_item_path(pointcloud_name) if download_items is True: api.pointcloud.download_path(pointcloud_id, pointcloud_file_path) related_images_path = dataset_fs.get_related_images_path(pointcloud_name) related_images = api.pointcloud.get_list_related_images(pointcloud_id) for rimage_info in related_images: name = rimage_info[ApiField.NAME] rimage_id = rimage_info[ApiField.ID] path_img = os.path.join(related_images_path, name) path_json = os.path.join(related_images_path, name + ".json") api.pointcloud.download_related_image(rimage_id, path_img) dump_json_file(rimage_info, path_json) else: touch(pointcloud_file_path) dataset_fs.add_item_file(pointcloud_name, pointcloud_file_path, ann=PointcloudAnnotation.from_json(ann_json, project_fs.meta, key_id_map), _validate_item=False) ds_progress.iters_done_report(len(batch)) project_fs.set_key_id_map(key_id_map)