Ejemplo n.º 1
0
    def upload_paths(self, dataset_id, names, paths, progress_cb=None, metas=None):
        def path_to_bytes_stream(path):
            return open(path, 'rb')

        video_info_results = []

        hashes = [get_file_hash(x) for x in paths]

        self._upload_data_bulk(path_to_bytes_stream, zip(paths, hashes), progress_cb=progress_cb)
        metas = self._api.import_storage.get_meta_by_hashes(hashes)
        metas2 = [meta["meta"] for meta in metas]

        for name, hash, meta in zip(names, hashes, metas2):
            try:
                all_streams = meta["streams"]
                video_streams = get_video_streams(all_streams)
                for stream_info in video_streams:
                    stream_index = stream_info["index"]

                    #TODO: check is community
                    # if instance_type == sly.COMMUNITY:
                    #     if _check_video_requires_processing(file_info, stream_info) is True:
                    #         warn_video_requires_processing(file_name)
                    #         continue

                    item_name = name
                    info = self._api.video.get_info_by_name(dataset_id, item_name)
                    if info is not None:
                        item_name = gen_video_stream_name(name, stream_index)
                    res = self.upload_hash(dataset_id, item_name, hash, stream_index)
                    video_info_results.append(res)
            except Exception as e:
                logger.warning("File skipped {!r}: error occurred during processing {!r}".format(name, str(e)))

        return video_info_results
Ejemplo n.º 2
0
    def upload_files(self, task_id, abs_paths, names, progress_cb=None):
        if len(abs_paths) != len(names):
            raise RuntimeError("Inconsistency: len(abs_paths) != len(names)")

        hashes = []
        if len(abs_paths) == 0:
            return

        hash_to_items = defaultdict(list)
        hash_to_name = defaultdict(list)
        for idx, item in enumerate(zip(abs_paths, names)):
            path, name = item
            item_hash = get_file_hash(path)
            hashes.append(item_hash)
            hash_to_items[item_hash].append(path)
            hash_to_name[item_hash].append(name)

        unique_hashes = set(hashes)
        remote_hashes = self._api.image.check_existing_hashes(
            list(unique_hashes))
        new_hashes = unique_hashes - set(remote_hashes)

        # @TODO: upload remote hashes
        if len(remote_hashes) != 0:
            files = []
            for hash in remote_hashes:
                for name in hash_to_name[hash]:
                    files.append({ApiField.NAME: name, ApiField.HASH: hash})
            for batch in batched(files):
                resp = self._api.post('tasks.files.bulk.add-by-hash', {
                    ApiField.TASK_ID: task_id,
                    ApiField.FILES: batch
                })
        if progress_cb is not None:
            progress_cb(len(remote_hashes))

        for batch in batched(list(zip(abs_paths, names, hashes))):
            content_dict = OrderedDict()
            for idx, item in enumerate(batch):
                path, name, hash = item
                if hash in remote_hashes:
                    continue
                content_dict["{}".format(idx)] = json.dumps({
                    "fullpath": name,
                    "hash": hash
                })
                content_dict["{}-file".format(idx)] = (name, open(path,
                                                                  'rb'), '')

            if len(content_dict) > 0:
                encoder = MultipartEncoder(fields=content_dict)
                resp = self._api.post('tasks.files.bulk.upload', encoder)
                if progress_cb is not None:
                    progress_cb(len(content_dict))
Ejemplo n.º 3
0
    def upload_paths(self, dataset_id, names, paths, progress_cb=None, metas=None):
        '''
        Upload images with given names from given pathes to dataset
        :param dataset_id: int
        :param names: list of str (if lengh of names list != lengh of paths list raise error)
        :param paths: list of str
        :param progress_cb:
        :param metas: list of dicts
        :return: list of images
        '''
        def path_to_bytes_stream(path):
            return open(path, 'rb')

        hashes = [get_file_hash(x) for x in paths]

        self._upload_data_bulk(path_to_bytes_stream, zip(paths, hashes), progress_cb=progress_cb)
        return self.upload_hashes(dataset_id, names, hashes, metas=metas)
Ejemplo n.º 4
0
 def download(self, team_id, remote_path, local_save_path, cache: FileCache = None, progress_cb=None):
     if cache is None:
         self._download(team_id, remote_path, local_save_path, progress_cb)
     else:
         file_info = self.get_info_by_path(team_id, remote_path)
         if file_info.hash is None:
             self._download(team_id, remote_path, local_save_path, progress_cb)
         else:
             cache_path = cache.check_storage_object(file_info.hash, get_file_ext(remote_path))
             if cache_path is None:
                 # file not in cache
                 self._download(team_id, remote_path, local_save_path, progress_cb)
                 if file_info.hash != get_file_hash(local_save_path):
                     raise KeyError(f"Remote and local hashes are different (team id: {team_id}, file: {remote_path})")
                 cache.write_object(local_save_path, file_info.hash)
             else:
                 cache.read_object(file_info.hash, local_save_path)
                 if progress_cb is not None:
                     progress_cb(get_file_size(local_save_path))