def __ror__(self, other): if isinstance(other, list): return Source(ic_from_array(other), ra_from_array(other), length=len(other)) | self elif hasattr(other, "__iter__"): return Source(ic_from_iterable(other), random_accessor=None) | self else: raise TypeError(f"unsupported pipe-operation with {type(other)}")
def __getitem__(self, item): """ sliceにはSourceを、intには値を返す :param item: :return: """ if isinstance(item, slice): if self.has_length: stop = item.stop if item.stop is not None else self.length start = item.start if item.start is not None else 0 step = item.step if item.step is not None else 1 assert step > 0 if stop < 0: stop += self.length if stop < 0: stop = 0 if start < 0: start += self.length if start < 0: start = 0 stop = min(stop, self.length) start = min(start, stop) sl = slice(start, stop, step) if self.data is not None: ic = ic_slice(ic_from_array(self.data), sl) ra = ra_slice(ra_from_array(self.data), s=sl) else: ic = ic_slice(self.iterable_creator, sl) ra = ra_slice(self.random_accessor, s=sl) if self.random_accessible else None return Source( ic, random_accessor=ra, length=(stop - start) // step, parents=[self], dependencies=[{"slice": (start, stop, step)}], ) else: if item.start is not None and item.start < 0 or \ item.stop is not None and item.stop < 0: raise IndexError( "negative index does not supported on the source that has no length" ) return Source(ic_slice(self.iterable_creator, item), parents=[self]) else: if self.data is not None: return self.data[item] if not self.has_length: if item < 0: raise IndexError( "negative index does not supported on the source that has no length" ) else: if item < 0: item += self.length if not (0 <= item < self.length): raise IndexError("index out of range") if not self.random_accessible: raise IndexError("this source is not able to be random accessed") return self.random_accessor(item)
def from_array(array, *, hash_func=None): """ create Source from list or tuple :param array: :param hash_func: arrayを引数にとって呼ばれ、整数値を Noneならハッシュを計算しない(定数になる) :return: """ assert type(array) in [tuple, list] if hash_func is not None: hs = hash_func(array) return Source(ic_from_array(array), ra_from_array(array), length=len(array), dependencies=[hs]) return Source(ic_from_array(array), ra_from_array(array), length=len(array))
def directory(path): path = pathlib.Path(path) files = list(path.iterdir()) obs = ic_from_array(files) ra = ra_from_array(files) hs = default_hash_func([str(a) for a in files]) return Source(obs, random_accessor=ra, length=len(files), dependencies=[hs])
def mem_cache(self): if self.data is not None: return desc = f"[flowder.mem_cache(?)]iterating source..." if self.has_length: it = tqdm(self.iterable_creator(0), total=len(self), desc=desc) else: it = tqdm(self.iterable_creator(0), desc=desc) data = list(it) self.data = data self.length = len(data) self.iterable_creator = ic_from_array(data) self.random_accessor = ra_from_array(data)
def glob(glob_path: str): """ files = glob("./*.jpg") :param glob_path: :return: """ files = [pathlib.Path(a) for a in glob_.glob(glob_path)] obs = ic_from_array(files) ra = ra_from_array(files) hs = default_hash_func([str(a) for a in files]) return Source(obs, random_accessor=ra, length=len(files), dependencies=[hs])
def from_items(*items): return Source(ic_from_array(items), ra_from_array(items), length=len(items))
def cache(self, name, cache_dir=".tmp", clear_cache="no", check_only=False, caller_file_name=None, length_only=False): """ 即座にメモリにロードし、キャッシュファイルを作成する。 キャッシュがすでにある場合はそれをロードする。 :param name: キャッシュ名 :param cache_dir: :param check_only: Trueなら、キャッシュが存在すればTrueを返す。キャッシュの作成も削除もしない。 :param clear_cache: ロード前にキャッシュを削除する。 "no": なにもしない(default) "yes": 完全一致キャッシュを削除 "all": キャッシュグループをすべて削除 "clear": キャッシュグループをすべて削除し、ロードしない。 :param caller_file_name: :param length_only: Trueで長さのみをキャッシュからロードする。 check_onlyが同時にTrueなら、length cacheの存在を確認する。 キャッシュがない場合はこのパラメータは無視される。 :return: """ assert clear_cache in ["no", "yes", "all", "clear"] cache_dir = pathlib.Path(cache_dir) if caller_file_name is None: p = pathlib.Path(inspect.currentframe().f_back.f_code.co_filename) caller_file_name = p.name[:-len(p.suffix)] cache_base_name = f"flowder.{caller_file_name}.{name}.{hex(self.hash)[2:]}" length_cache_base_name = f"flowder.{caller_file_name}.{name}.{hex(self.hash)[2:]}.len" cache_file_path = cache_dir / cache_base_name length_cache_file_path = cache_dir / length_cache_base_name if check_only: if length_only: return length_cache_file_path.exists() else: return cache_file_path.exists() if clear_cache == "all": # 同一のcache_group_nameのすべてのキャッシュも削除する for p in cache_dir.glob(f"flowder.{caller_file_name}.{name}*"): p.unlink() elif clear_cache == "yes": # キャッシュファイル名が完全一致するファイルを削除する if cache_file_path.exists(): cache_file_path.unlink() if length_cache_file_path.exists(): length_cache_file_path.unlink() elif clear_cache == "clear": for p in cache_dir.glob(f"flowder.{caller_file_name}.{name}*"): p.unlink() return if length_only and length_cache_file_path.exists(): # loading length from cache with length_cache_file_path.open("rb") as f: self.length = pickle.load(f) assert type(self.length) == int return self if cache_file_path.exists(): print(f"[flowder.cache({name})]loading cache...<< {cache_file_path}") with cache_file_path.open("rb") as f: data = pickle.load(f) self.data = data self.length = len(data) self.iterable_creator = ic_from_array(data) self.random_accessor = ra_from_array(data) return self else: if self.data is None: desc = f"[flowder.cache({name})]iterating source..." if self.has_length: it = tqdm(self.iterable_creator(0), total=len(self), desc=desc) else: it = tqdm(self.iterable_creator(0), desc=desc) data = list(it) self.data = data self.length = len(data) self.iterable_creator = ic_from_array(data) self.random_accessor = ra_from_array(data) print(f"[flowder.cache({name})]create cache file...>> {cache_file_path}") if not cache_dir.exists(): cache_dir.mkdir(parents=True) with cache_file_path.open("wb") as f: pickle.dump(self.data, f) with length_cache_file_path.open("wb") as f: pickle.dump(len(self.data), f) return self
def __call__(self, source): if isinstance(source, list) or isinstance(source, tuple): source = Source(ic_from_array(source), ra_from_array(source), length=len(source)) assert isinstance(source, Source), \ f"Argument for FlatMapped called as function must be Source, but {type(source)} found" return source.flat_map(self.transform, dependencies=self.d)