def upload_paths(self, dataset_id, names, paths, progress_cb=None, metas=None): def path_to_bytes_stream(path): return open(path, 'rb') video_info_results = [] hashes = [get_file_hash(x) for x in paths] self._upload_data_bulk(path_to_bytes_stream, zip(paths, hashes), progress_cb=progress_cb) metas = self._api.import_storage.get_meta_by_hashes(hashes) metas2 = [meta["meta"] for meta in metas] for name, hash, meta in zip(names, hashes, metas2): try: all_streams = meta["streams"] video_streams = get_video_streams(all_streams) for stream_info in video_streams: stream_index = stream_info["index"] #TODO: check is community # if instance_type == sly.COMMUNITY: # if _check_video_requires_processing(file_info, stream_info) is True: # warn_video_requires_processing(file_name) # continue item_name = name info = self._api.video.get_info_by_name(dataset_id, item_name) if info is not None: item_name = gen_video_stream_name(name, stream_index) res = self.upload_hash(dataset_id, item_name, hash, stream_index) video_info_results.append(res) except Exception as e: logger.warning("File skipped {!r}: error occurred during processing {!r}".format(name, str(e))) return video_info_results
def upload_files(self, task_id, abs_paths, names, progress_cb=None): if len(abs_paths) != len(names): raise RuntimeError("Inconsistency: len(abs_paths) != len(names)") hashes = [] if len(abs_paths) == 0: return hash_to_items = defaultdict(list) hash_to_name = defaultdict(list) for idx, item in enumerate(zip(abs_paths, names)): path, name = item item_hash = get_file_hash(path) hashes.append(item_hash) hash_to_items[item_hash].append(path) hash_to_name[item_hash].append(name) unique_hashes = set(hashes) remote_hashes = self._api.image.check_existing_hashes( list(unique_hashes)) new_hashes = unique_hashes - set(remote_hashes) # @TODO: upload remote hashes if len(remote_hashes) != 0: files = [] for hash in remote_hashes: for name in hash_to_name[hash]: files.append({ApiField.NAME: name, ApiField.HASH: hash}) for batch in batched(files): resp = self._api.post('tasks.files.bulk.add-by-hash', { ApiField.TASK_ID: task_id, ApiField.FILES: batch }) if progress_cb is not None: progress_cb(len(remote_hashes)) for batch in batched(list(zip(abs_paths, names, hashes))): content_dict = OrderedDict() for idx, item in enumerate(batch): path, name, hash = item if hash in remote_hashes: continue content_dict["{}".format(idx)] = json.dumps({ "fullpath": name, "hash": hash }) content_dict["{}-file".format(idx)] = (name, open(path, 'rb'), '') if len(content_dict) > 0: encoder = MultipartEncoder(fields=content_dict) resp = self._api.post('tasks.files.bulk.upload', encoder) if progress_cb is not None: progress_cb(len(content_dict))
def upload_paths(self, dataset_id, names, paths, progress_cb=None, metas=None): ''' Upload images with given names from given pathes to dataset :param dataset_id: int :param names: list of str (if lengh of names list != lengh of paths list raise error) :param paths: list of str :param progress_cb: :param metas: list of dicts :return: list of images ''' def path_to_bytes_stream(path): return open(path, 'rb') hashes = [get_file_hash(x) for x in paths] self._upload_data_bulk(path_to_bytes_stream, zip(paths, hashes), progress_cb=progress_cb) return self.upload_hashes(dataset_id, names, hashes, metas=metas)
def download(self, team_id, remote_path, local_save_path, cache: FileCache = None, progress_cb=None): if cache is None: self._download(team_id, remote_path, local_save_path, progress_cb) else: file_info = self.get_info_by_path(team_id, remote_path) if file_info.hash is None: self._download(team_id, remote_path, local_save_path, progress_cb) else: cache_path = cache.check_storage_object(file_info.hash, get_file_ext(remote_path)) if cache_path is None: # file not in cache self._download(team_id, remote_path, local_save_path, progress_cb) if file_info.hash != get_file_hash(local_save_path): raise KeyError(f"Remote and local hashes are different (team id: {team_id}, file: {remote_path})") cache.write_object(local_save_path, file_info.hash) else: cache.read_object(file_info.hash, local_save_path) if progress_cb is not None: progress_cb(get_file_size(local_save_path))