Example #1
0
    def upload_files(self, task_id, abs_paths, names, progress_cb=None):
        if len(abs_paths) != len(names):
            raise RuntimeError("Inconsistency: len(abs_paths) != len(names)")

        hashes = []
        if len(abs_paths) == 0:
            return

        hash_to_items = defaultdict(list)
        hash_to_name = defaultdict(list)
        for idx, item in enumerate(zip(abs_paths, names)):
            path, name = item
            item_hash = get_file_hash(path)
            hashes.append(item_hash)
            hash_to_items[item_hash].append(path)
            hash_to_name[item_hash].append(name)

        unique_hashes = set(hashes)
        remote_hashes = self._api.image.check_existing_hashes(
            list(unique_hashes))
        new_hashes = unique_hashes - set(remote_hashes)

        # @TODO: upload remote hashes
        if len(remote_hashes) != 0:
            files = []
            for hash in remote_hashes:
                for name in hash_to_name[hash]:
                    files.append({ApiField.NAME: name, ApiField.HASH: hash})
            for batch in batched(files):
                resp = self._api.post('tasks.files.bulk.add-by-hash', {
                    ApiField.TASK_ID: task_id,
                    ApiField.FILES: batch
                })
        if progress_cb is not None:
            progress_cb(len(remote_hashes))

        for batch in batched(list(zip(abs_paths, names, hashes))):
            content_dict = OrderedDict()
            for idx, item in enumerate(batch):
                path, name, hash = item
                if hash in remote_hashes:
                    continue
                content_dict["{}".format(idx)] = json.dumps({
                    "fullpath": name,
                    "hash": hash
                })
                content_dict["{}-file".format(idx)] = (name, open(path,
                                                                  'rb'), '')

            if len(content_dict) > 0:
                encoder = MultipartEncoder(fields=content_dict)
                resp = self._api.post('tasks.files.bulk.upload', encoder)
                if progress_cb is not None:
                    progress_cb(len(content_dict))
    def _upload_bulk_add(self, func_item_to_kv, dataset_id, names, items, metas=None, progress_cb=None):
        if metas is None:
            metas = [{}] * len(items)

        results = []
        if len(names) == 0:
            return results
        if len(names) != len(items):
            raise RuntimeError("Can not match \"names\" and \"items\" lists, len(names) != len(items)")

        for batch in batched(list(zip(names, items, metas))):
            images = []
            for name, item, meta in batch:
                item_tuple = func_item_to_kv(item)
                images.append({ApiField.NAME: name,
                               item_tuple[0]: item_tuple[1],
                               ApiField.META: meta if meta is not None else {}})
            response = self._api.post('point-clouds.bulk.add', {ApiField.DATASET_ID: dataset_id,
                                                                ApiField.POINTCLOUDS: images})
            if progress_cb is not None:
                progress_cb(len(images))

            results.extend([self._convert_json_info(item) for item in response.json()])
        name_to_res = {img_info.name: img_info for img_info in results}
        ordered_results = [name_to_res[name] for name in names]

        return ordered_results
    def _upload_data_bulk(self, func_item_to_byte_stream, func_item_hash, items, progress_cb):
        hashes = []
        if len(items) == 0:
            return hashes

        hash_to_items = defaultdict(list)

        for idx, item in enumerate(items):
            item_hash = func_item_hash(item)
            hashes.append(item_hash)
            hash_to_items[item_hash].append(item)

        unique_hashes = set(hashes)
        remote_hashes = self.check_existing_hashes(list(unique_hashes))
        new_hashes = unique_hashes - set(remote_hashes)

        if progress_cb is not None:
            progress_cb(len(remote_hashes))

        # upload only new images to supervisely server
        items_to_upload = []
        for hash in new_hashes:
            items_to_upload.extend(hash_to_items[hash])

        for batch in batched(items_to_upload):
            content_dict = {}
            for idx, item in enumerate(batch):
                content_dict["{}-file".format(idx)] = (str(idx), func_item_to_byte_stream(item), 'pcd/*')
            encoder = MultipartEncoder(fields=content_dict)
            self._api.post('point-clouds.bulk.upload', encoder)
            if progress_cb is not None:
                progress_cb(len(batch))

        return hashes
 def check_existing_hashes(self, hashes):
     results = []
     if len(hashes) == 0:
         return results
     for hashes_batch in batched(hashes, batch_size=900):
         response = self._api.post('images.internal.hashes.list', hashes_batch)
         results.extend(response.json())
     return results
Example #5
0
    def _append_bulk(self,
                     entity_id,
                     figures_json,
                     figures_keys,
                     key_id_map: KeyIdMap,
                     field_name=ApiField.ENTITY_ID):
        if len(figures_json) == 0:
            return

        for (batch_keys,
             batch_jsons) in zip(batched(figures_keys, batch_size=100),
                                 batched(figures_json, batch_size=100)):
            resp = self._api.post('figures.bulk.add', {
                field_name: entity_id,
                ApiField.FIGURES: batch_jsons
            })
            for key, resp_obj in zip(batch_keys, resp.json()):
                figure_id = resp_obj[ApiField.ID]
                key_id_map.add_figure(key, figure_id)
Example #6
0
 def _download_batch_by_hashes(self, hashes):
     for batch_hashes in batched(hashes):
         response = self._api.post(
             'images.bulk.download-by-hash', {ApiField.HASHES: batch_hashes})
         decoder = MultipartDecoder.from_response(response)
         for part in decoder.parts:
             content_utf8 = part.headers[b'Content-Disposition'].decode('utf-8')
             # Find name="1245" preceded by a whitespace, semicolon or beginning of line.
             # The regex has 2 capture group: one for the prefix and one for the actual name value.
             h = content_utf8.replace("form-data; name=\"", "")[:-1]
             yield h, part
Example #7
0
 def check_existing_hashes(self, hashes):
     '''
     :param hashes: list of str
     :return: list of jsons objects(None if image with given hash not exist)
     '''
     results = []
     if len(hashes) == 0:
         return results
     for hashes_batch in batched(hashes, batch_size=900):
         response = self._api.post('images.internal.hashes.list', hashes_batch)
         results.extend(response.json())
     return results
    def _upload_batch(self, func_ann_to_json, img_ids, anns, progress_cb=None):
        # img_ids from the same dataset
        if len(img_ids) == 0:
            return
        if len(img_ids) != len(anns):
            raise RuntimeError('Can not match "img_ids" and "anns" lists, len(img_ids) != len(anns)')

        dataset_id = self._api.image.get_info_by_id(img_ids[0]).dataset_id
        for batch in batched(list(zip(img_ids, anns))):
            data = [{ApiField.IMAGE_ID: img_id, ApiField.ANNOTATION: func_ann_to_json(ann)} for img_id, ann in batch]
            self._api.post('annotations.bulk.add', data={ApiField.DATASET_ID: dataset_id, ApiField.ANNOTATIONS: data})
            if progress_cb is not None:
                progress_cb(len(batch))
Example #9
0
    def _upload_data_bulk(self,
                          func_item_to_byte_stream,
                          items_hashes,
                          retry_cnt=3,
                          progress_cb=None,
                          item_progress=None):
        hash_to_items = {i_hash: item for item, i_hash in items_hashes}

        unique_hashes = set(hash_to_items.keys())
        remote_hashes = set(self.check_existing_hashes(
            list(unique_hashes)))  # existing -- from server
        if progress_cb:
            progress_cb(len(remote_hashes))
        #pending_hashes = unique_hashes #- remote_hashes #@TODO: only fo debug!
        pending_hashes = unique_hashes - remote_hashes

        for retry_idx in range(retry_cnt):
            # single attempt to upload all data which is not uploaded yet
            for hashes in batched(list(pending_hashes)):
                pending_hashes_items = [(h, hash_to_items[h]) for h in hashes]
                hashes_rcv = self._upload_uniq_videos_single_req(
                    func_item_to_byte_stream, pending_hashes_items,
                    item_progress)
                pending_hashes -= set(hashes_rcv)
                if set(hashes_rcv) - set(hashes):
                    logger.warn('Hash inconsistency in images bulk upload.',
                                extra={
                                    'sent': hashes,
                                    'received': hashes_rcv
                                })
                if progress_cb:
                    progress_cb(len(hashes_rcv))

            if not pending_hashes:
                return

            logger.warn('Unable to upload videos (data).',
                        extra={
                            'retry_idx':
                            retry_idx,
                            'items':
                            [(h, hash_to_items[h]) for h in pending_hashes]
                        })
            # now retry it for the case if it is a shadow server/connection error

        raise RuntimeError(
            "Unable to upload videos (data). "
            "Please check if videos are in supported format and if ones aren't corrupted."
        )
Example #10
0
    def copy_batch(self, src_image_ids, dst_image_ids, progress_cb=None):
        if len(src_image_ids) != len(dst_image_ids):
            raise RuntimeError('Can not match "src_image_ids" and "dst_image_ids" lists, '
                               'len(src_image_ids) != len(dst_image_ids)')
        if len(src_image_ids) == 0:
            return

        src_dataset_id = self._api.image.get_info_by_id(src_image_ids[0]).dataset_id
        for cur_batch in batched(list(zip(src_image_ids, dst_image_ids))):
            src_ids_batch, dst_ids_batch = zip(*cur_batch)
            ann_infos = self.download_batch(src_dataset_id, src_ids_batch)
            ann_jsons = [ann_info.annotation for ann_info in ann_infos]
            self.upload_jsons(dst_ids_batch, ann_jsons)
            if progress_cb is not None:
                progress_cb(len(src_ids_batch))
Example #11
0
 def _download_batch(self, dataset_id, ids):
     '''
     Generate image id and it content from given dataset and list of images ids
     :param dataset_id: int
     :param ids: list of integers
     '''
     for batch_ids in batched(ids):
         response = self._api.post(
             'images.bulk.download', {ApiField.DATASET_ID: dataset_id, ApiField.IMAGE_IDS: batch_ids})
         decoder = MultipartDecoder.from_response(response)
         for part in decoder.parts:
             content_utf8 = part.headers[b'Content-Disposition'].decode('utf-8')
             # Find name="1245" preceded by a whitespace, semicolon or beginning of line.
             # The regex has 2 capture group: one for the prefix and one for the actual name value.
             img_id = int(re.findall(r'(^|[\s;])name="(\d*)"', content_utf8)[0][1])
             yield img_id, part
Example #12
0
 def get_info_by_id_batch(self, ids):
     '''
     :param ids: list of integers
     :return: list of images metadata
     '''
     results = []
     if len(ids) == 0:
         return results
     dataset_id = self.get_info_by_id(ids[0]).dataset_id
     for batch in batched(ids):
         filters = [{"field": ApiField.ID, "operator": "in", "value": batch}]
         results.extend(self.get_list_all_pages('images.list', {ApiField.DATASET_ID: dataset_id,
                                                                ApiField.FILTER: filters}))
     temp_map = {info.id: info for info in results}
     ordered_results = [temp_map[id] for id in ids]
     return ordered_results
Example #13
0
    def append_labels(self, image_id, labels):
        if len(labels) == 0:
            return

        payload = []
        for label in labels:
            _label_json = label.to_json()
            _label_json["geometry"] = label.geometry.to_json()
            if "classId" not in _label_json:
                raise KeyError("Update project meta from server to get class id")
            payload.append(_label_json)

        added_ids = []
        for batch_jsons in batched(payload, batch_size=100):
            resp = self._api.post('figures.bulk.add', {ApiField.ENTITY_ID: image_id, ApiField.FIGURES: batch_jsons})
            for resp_obj in resp.json():
                figure_id = resp_obj[ApiField.ID]
                added_ids.append(figure_id)
Example #14
0
    def _upload_data_bulk(self, func_item_to_byte_stream, items_hashes, retry_cnt=3, progress_cb=None):
        """
        Upload images (binary data) to server. Works with already existing or duplicating images.
        :param func_item_to_byte_stream: converter for "item" to byte stream
        :param items_hashes: iterable of pairs (item, hash) where "item" is a some descriptor (e.g. image file path)
         for image data, and "hash" is a hash for the image binary data
        :param retry_cnt: int, number of retries to send the whole set of items
        :param progress_cb: callback to account progress (in number of items)
        """
        hash_to_items = {i_hash: item for item, i_hash in items_hashes}

        unique_hashes = set(hash_to_items.keys())
        remote_hashes = set(self.check_existing_hashes(list(unique_hashes)))  # existing -- from server
        if progress_cb:
            progress_cb(len(remote_hashes))
        pending_hashes = unique_hashes - remote_hashes

        # @TODO: some correlation with sly.io.network_exceptions. Should we perform retries here?
        for retry_idx in range(retry_cnt):
            # single attempt to upload all data which is not uploaded yet

            for hashes in batched(list(pending_hashes)):
                pending_hashes_items = [(h, hash_to_items[h]) for h in hashes]
                hashes_rcv = self._upload_uniq_images_single_req(func_item_to_byte_stream, pending_hashes_items)
                pending_hashes -= set(hashes_rcv)
                if set(hashes_rcv) - set(hashes):
                    logger.warn('Hash inconsistency in images bulk upload.',
                                extra={'sent': hashes, 'received': hashes_rcv})
                if progress_cb:
                    progress_cb(len(hashes_rcv))

            if not pending_hashes:
                return

            logger.warn('Unable to upload images (data).', extra={
                'retry_idx': retry_idx,
                'items': [(h, hash_to_items[h]) for h in pending_hashes]
            })
            # now retry it for the case if it is a shadow server/connection error

        raise RuntimeError("Unable to upload images (data). "
                           "Please check if images are in supported format and if ones aren't corrupted.")
Example #15
0
 def download_batch(self, dataset_id, image_ids, progress_cb=None, with_custom_data=False):
     '''
     :param dataset_id: int
     :param image_ids: list of integers
     :param progress_cb:
     :return: list of serialized JSON annotations for the given dataset id and image id's
     '''
     id_to_ann = {}
     for batch in batched(image_ids):
         post_data = {
             ApiField.DATASET_ID: dataset_id,
             ApiField.IMAGE_IDS: batch,
             ApiField.WITH_CUSTOM_DATA: with_custom_data
         }
         results = self._api.post('annotations.bulk.info', data=post_data).json()
         for ann_dict in results:
             ann_info = self._convert_json_info(ann_dict)
             id_to_ann[ann_info.image_id] = ann_info
         if progress_cb is not None:
             progress_cb(len(batch))
     ordered_results = [id_to_ann[image_id] for image_id in image_ids]
     return ordered_results
Example #16
0
    def _upload_bulk_add(self, func_item_to_kv, dataset_id, names, items, progress_cb=None, metas=None):
        results = []

        if len(names) == 0:
            return results
        if len(names) != len(items):
            raise RuntimeError("Can not match \"names\" and \"items\" lists, len(names) != len(items)")

        if metas is None:
            metas = [{}] * len(names)
        else:
            if len(names) != len(metas):
                raise RuntimeError("Can not match \"names\" and \"metas\" len(names) != len(metas)")

        for batch in batched(list(zip(names, items, metas))):
            images = []
            for name, item, meta in batch:
                item_tuple = func_item_to_kv(item)
                #@TODO: 'title' -> ApiField.NAME
                image_data = {'title': name, item_tuple[0]: item_tuple[1]}
                if len(meta) != 0 and type(meta) == dict:
                    image_data[ApiField.META] = meta
                images.append(image_data)

            response = self._api.post('images.bulk.add', {ApiField.DATASET_ID: dataset_id, ApiField.IMAGES: images})
            if progress_cb is not None:
                progress_cb(len(images))

            for info_json in response.json():
                info_json_copy = info_json.copy()
                info_json_copy[ApiField.EXT] = info_json[ApiField.MIME].split('/')[1]
                #results.append(self.InfoType(*[info_json_copy[field_name] for field_name in self.info_sequence()]))
                results.append(self._convert_json_info(info_json_copy))

        #name_to_res = {img_info.name: img_info for img_info in results}
        #ordered_results = [name_to_res[name] for name in names]

        return results #ordered_results
Example #17
0
 def remove_batch(self, ids, progress_cb=None):
     for ids_batch in batched(ids):
         self._api.post(self._remove_batch_api_method_name(),
                        {self._remove_batch_field_name(): ids_batch})
         if progress_cb is not None:
             progress_cb(len(ids_batch))
Example #18
0
def download_pointcloud_episode_project(api,
                                        project_id,
                                        dest_dir,
                                        dataset_ids=None,
                                        download_pcd=True,
                                        download_realated_images=True,
                                        download_annotations=True,
                                        log_progress=False,
                                        batch_size=10):
    key_id_map = KeyIdMap()
    project_fs = PointcloudEpisodeProject(dest_dir, OpenMode.CREATE)
    meta = ProjectMeta.from_json(api.project.get_meta(project_id))
    project_fs.set_meta(meta)

    datasets_infos = []
    if dataset_ids is not None:
        for ds_id in dataset_ids:
            datasets_infos.append(api.dataset.get_info_by_id(ds_id))
    else:
        datasets_infos = api.dataset.get_list(project_id)

    for dataset in datasets_infos:
        dataset_fs = project_fs.create_dataset(dataset.name)
        pointclouds = api.pointcloud_episode.get_list(dataset.id)

        if download_annotations:
            # Download annotation to project_path/dataset_path/annotation.json
            ann_json = api.pointcloud_episode.annotation.download(dataset.id)
            annotation = dataset_fs.annotation_class.from_json(
                ann_json, meta, key_id_map)
            dataset_fs.set_ann(annotation)

            # frames --> pointcloud mapping to project_path/dataset_path/frame_pointcloud_map.json
            frame_name_map = api.pointcloud_episode.get_frame_name_map(
                dataset.id)
            frame_pointcloud_map_path = dataset_fs.get_frame_pointcloud_map_path(
            )
            dump_json_file(frame_name_map, frame_pointcloud_map_path)

        # Download data
        if log_progress:
            ds_progress = Progress('Downloading dataset: {!r}'.format(
                dataset.name),
                                   total_cnt=len(pointclouds))

        for batch in batched(pointclouds, batch_size=batch_size):
            pointcloud_ids = [pointcloud_info.id for pointcloud_info in batch]
            pointcloud_names = [
                pointcloud_info.name for pointcloud_info in batch
            ]

            for pointcloud_id, pointcloud_name in zip(pointcloud_ids,
                                                      pointcloud_names):
                pointcloud_file_path = dataset_fs.generate_item_path(
                    pointcloud_name)
                if download_pcd is True:
                    api.pointcloud_episode.download_path(
                        pointcloud_id, pointcloud_file_path)
                else:
                    touch(pointcloud_file_path)

                if download_realated_images:
                    related_images_path = dataset_fs.get_related_images_path(
                        pointcloud_name)
                    related_images = api.pointcloud_episode.get_list_related_images(
                        pointcloud_id)
                    for rimage_info in related_images:
                        name = rimage_info[ApiField.NAME]
                        rimage_id = rimage_info[ApiField.ID]

                        path_img = os.path.join(related_images_path, name)
                        path_json = os.path.join(related_images_path,
                                                 name + ".json")

                        api.pointcloud_episode.download_related_image(
                            rimage_id, path_img)
                        dump_json_file(rimage_info, path_json)

                dataset_fs.add_item_file(pointcloud_name,
                                         pointcloud_file_path,
                                         _validate_item=False)
            if log_progress:
                ds_progress.iters_done_report(len(batch))

    project_fs.set_key_id_map(key_id_map)
Example #19
0
def download_video_project(api,
                           project_id,
                           dest_dir,
                           dataset_ids=None,
                           download_videos=True,
                           log_progress=False):
    '''
    Download project with given id in destination directory
    :param api: Api class object
    :param project_id: int
    :param dest_dir: str
    :param dataset_ids: list of integers
    :param download_videos: bool
    :param log_progress: bool
    '''
    LOG_BATCH_SIZE = 1

    key_id_map = KeyIdMap()

    project_fs = VideoProject(dest_dir, OpenMode.CREATE)

    meta = ProjectMeta.from_json(api.project.get_meta(project_id))
    project_fs.set_meta(meta)

    datasets_infos = []
    if dataset_ids is not None:
        for ds_id in dataset_ids:
            datasets_infos.append(api.dataset.get_info_by_id(ds_id))
    else:
        datasets_infos = api.dataset.get_list(project_id)

    for dataset in datasets_infos:
        dataset_fs = project_fs.create_dataset(dataset.name)
        videos = api.video.get_list(dataset.id)

        ds_progress = None
        if log_progress:
            ds_progress = Progress('Downloading dataset: {!r}'.format(
                dataset.name),
                                   total_cnt=len(videos))
        for batch in batched(videos, batch_size=LOG_BATCH_SIZE):
            video_ids = [video_info.id for video_info in batch]
            video_names = [video_info.name for video_info in batch]

            ann_jsons = api.video.annotation.download_bulk(
                dataset.id, video_ids)

            for video_id, video_name, ann_json in zip(video_ids, video_names,
                                                      ann_jsons):
                if video_name != ann_json[ApiField.VIDEO_NAME]:
                    raise RuntimeError(
                        "Error in api.video.annotation.download_batch: broken order"
                    )

                video_file_path = dataset_fs.generate_item_path(video_name)
                if download_videos is True:
                    api.video.download_path(video_id, video_file_path)
                else:
                    touch(video_file_path)

                dataset_fs.add_item_file(video_name,
                                         video_file_path,
                                         ann=VideoAnnotation.from_json(
                                             ann_json, project_fs.meta,
                                             key_id_map),
                                         _validate_item=False)

            ds_progress.iters_done_report(len(batch))

    project_fs.set_key_id_map(key_id_map)
def download_pointcloud_project(api,
                                project_id,
                                dest_dir,
                                dataset_ids=None,
                                download_items=True,
                                log_progress=False):
    LOG_BATCH_SIZE = 1

    key_id_map = KeyIdMap()

    project_fs = PointcloudProject(dest_dir, OpenMode.CREATE)

    meta = ProjectMeta.from_json(api.project.get_meta(project_id))
    project_fs.set_meta(meta)

    datasets_infos = []
    if dataset_ids is not None:
        for ds_id in dataset_ids:
            datasets_infos.append(api.dataset.get_info_by_id(ds_id))
    else:
        datasets_infos = api.dataset.get_list(project_id)

    for dataset in datasets_infos:
        dataset_fs = project_fs.create_dataset(dataset.name)
        pointclouds = api.pointcloud.get_list(dataset.id)

        ds_progress = None
        if log_progress:
            ds_progress = Progress('Downloading dataset: {!r}'.format(
                dataset.name),
                                   total_cnt=len(pointclouds))
        for batch in batched(pointclouds, batch_size=LOG_BATCH_SIZE):
            pointcloud_ids = [pointcloud_info.id for pointcloud_info in batch]
            pointcloud_names = [
                pointcloud_info.name for pointcloud_info in batch
            ]

            ann_jsons = api.pointcloud.annotation.download_bulk(
                dataset.id, pointcloud_ids)

            for pointcloud_id, pointcloud_name, ann_json in zip(
                    pointcloud_ids, pointcloud_names, ann_jsons):
                if pointcloud_name != ann_json[ApiField.NAME]:
                    raise RuntimeError(
                        "Error in api.video.annotation.download_batch: broken order"
                    )

                pointcloud_file_path = dataset_fs.generate_item_path(
                    pointcloud_name)
                if download_items is True:
                    api.pointcloud.download_path(pointcloud_id,
                                                 pointcloud_file_path)

                    related_images_path = dataset_fs.get_related_images_path(
                        pointcloud_name)
                    related_images = api.pointcloud.get_list_related_images(
                        pointcloud_id)
                    for rimage_info in related_images:
                        name = rimage_info[ApiField.NAME]

                        if not has_valid_ext(name):
                            new_name = get_file_name(
                                name)  # to fix cases like .png.json
                            if has_valid_ext(new_name):
                                name = new_name
                                rimage_info[ApiField.NAME] = name
                            else:
                                raise RuntimeError(
                                    'Something wrong with photo context filenames.\
                                                    Please, contact support')

                        rimage_id = rimage_info[ApiField.ID]

                        path_img = os.path.join(related_images_path, name)
                        path_json = os.path.join(related_images_path,
                                                 name + ".json")

                        api.pointcloud.download_related_image(
                            rimage_id, path_img)
                        dump_json_file(rimage_info, path_json)

                else:
                    touch(pointcloud_file_path)

                dataset_fs.add_item_file(pointcloud_name,
                                         pointcloud_file_path,
                                         ann=PointcloudAnnotation.from_json(
                                             ann_json, project_fs.meta,
                                             key_id_map),
                                         _validate_item=False)

            ds_progress.iters_done_report(len(batch))

    project_fs.set_key_id_map(key_id_map)