예제 #1
0
 def __ror__(self, other):
     if isinstance(other, list):
         return Source(ic_from_array(other), ra_from_array(other), length=len(other)) | self
     elif hasattr(other, "__iter__"):
         return Source(ic_from_iterable(other), random_accessor=None) | self
     else:
         raise TypeError(f"unsupported pipe-operation with {type(other)}")
예제 #2
0
    def __getitem__(self, item):
        """
        sliceにはSourceを、intには値を返す
        :param item:
        :return:
        """

        if isinstance(item, slice):
            if self.has_length:
                stop = item.stop if item.stop is not None else self.length
                start = item.start if item.start is not None else 0
                step = item.step if item.step is not None else 1
                assert step > 0
                if stop < 0:
                    stop += self.length
                if stop < 0:
                    stop = 0
                if start < 0:
                    start += self.length
                if start < 0:
                    start = 0
                stop = min(stop, self.length)
                start = min(start, stop)
                sl = slice(start, stop, step)
                if self.data is not None:
                    ic = ic_slice(ic_from_array(self.data), sl)
                    ra = ra_slice(ra_from_array(self.data), s=sl)
                else:
                    ic = ic_slice(self.iterable_creator, sl)
                    ra = ra_slice(self.random_accessor, s=sl) if self.random_accessible else None
                return Source(
                    ic,
                    random_accessor=ra,
                    length=(stop - start) // step,
                    parents=[self],
                    dependencies=[{"slice": (start, stop, step)}],
                )
            else:
                if item.start is not None and item.start < 0 or \
                        item.stop is not None and item.stop < 0:
                    raise IndexError(
                        "negative index does not supported on the source that has no length"
                    )
                return Source(ic_slice(self.iterable_creator, item), parents=[self])
        else:
            if self.data is not None:
                return self.data[item]
            if not self.has_length:
                if item < 0:
                    raise IndexError(
                        "negative index does not supported on the source that has no length"
                    )
            else:
                if item < 0:
                    item += self.length
                if not (0 <= item < self.length):
                    raise IndexError("index out of range")
            if not self.random_accessible:
                raise IndexError("this source is not able to be random accessed")
            return self.random_accessor(item)
예제 #3
0
def from_array(array, *, hash_func=None):
    """
    create Source from list or tuple
    :param array:
    :param hash_func:
    arrayを引数にとって呼ばれ、整数値を
    Noneならハッシュを計算しない(定数になる)
    :return:
    """
    assert type(array) in [tuple, list]
    if hash_func is not None:
        hs = hash_func(array)
        return Source(ic_from_array(array),
                      ra_from_array(array),
                      length=len(array),
                      dependencies=[hs])
    return Source(ic_from_array(array),
                  ra_from_array(array),
                  length=len(array))
예제 #4
0
def directory(path):
    path = pathlib.Path(path)
    files = list(path.iterdir())
    obs = ic_from_array(files)
    ra = ra_from_array(files)
    hs = default_hash_func([str(a) for a in files])
    return Source(obs,
                  random_accessor=ra,
                  length=len(files),
                  dependencies=[hs])
예제 #5
0
 def mem_cache(self):
     if self.data is not None:
         return
     desc = f"[flowder.mem_cache(?)]iterating source..."
     if self.has_length:
         it = tqdm(self.iterable_creator(0), total=len(self), desc=desc)
     else:
         it = tqdm(self.iterable_creator(0), desc=desc)
     data = list(it)
     self.data = data
     self.length = len(data)
     self.iterable_creator = ic_from_array(data)
     self.random_accessor = ra_from_array(data)
예제 #6
0
def glob(glob_path: str):
    """
    files = glob("./*.jpg")
    :param glob_path:
    :return:
    """
    files = [pathlib.Path(a) for a in glob_.glob(glob_path)]
    obs = ic_from_array(files)
    ra = ra_from_array(files)
    hs = default_hash_func([str(a) for a in files])
    return Source(obs,
                  random_accessor=ra,
                  length=len(files),
                  dependencies=[hs])
예제 #7
0
def from_items(*items):
    return Source(ic_from_array(items),
                  ra_from_array(items),
                  length=len(items))
예제 #8
0
    def cache(self, name,
              cache_dir=".tmp",
              clear_cache="no",
              check_only=False,
              caller_file_name=None,
              length_only=False):
        """
        即座にメモリにロードし、キャッシュファイルを作成する。
        キャッシュがすでにある場合はそれをロードする。
        :param name: キャッシュ名
        :param cache_dir:
        :param check_only: Trueなら、キャッシュが存在すればTrueを返す。キャッシュの作成も削除もしない。
        :param clear_cache: ロード前にキャッシュを削除する。
        "no": なにもしない(default)
        "yes": 完全一致キャッシュを削除
        "all": キャッシュグループをすべて削除
        "clear": キャッシュグループをすべて削除し、ロードしない。
        :param caller_file_name:
        :param length_only:
        Trueで長さのみをキャッシュからロードする。
        check_onlyが同時にTrueなら、length cacheの存在を確認する。
        キャッシュがない場合はこのパラメータは無視される。
        :return:
        """
        assert clear_cache in ["no", "yes", "all", "clear"]
        cache_dir = pathlib.Path(cache_dir)
        if caller_file_name is None:
            p = pathlib.Path(inspect.currentframe().f_back.f_code.co_filename)
            caller_file_name = p.name[:-len(p.suffix)]

        cache_base_name = f"flowder.{caller_file_name}.{name}.{hex(self.hash)[2:]}"
        length_cache_base_name = f"flowder.{caller_file_name}.{name}.{hex(self.hash)[2:]}.len"
        cache_file_path = cache_dir / cache_base_name
        length_cache_file_path = cache_dir / length_cache_base_name

        if check_only:
            if length_only:
                return length_cache_file_path.exists()
            else:
                return cache_file_path.exists()

        if clear_cache == "all":  # 同一のcache_group_nameのすべてのキャッシュも削除する
            for p in cache_dir.glob(f"flowder.{caller_file_name}.{name}*"):
                p.unlink()
        elif clear_cache == "yes":  # キャッシュファイル名が完全一致するファイルを削除する
            if cache_file_path.exists():
                cache_file_path.unlink()
            if length_cache_file_path.exists():
                length_cache_file_path.unlink()
        elif clear_cache == "clear":
            for p in cache_dir.glob(f"flowder.{caller_file_name}.{name}*"):
                p.unlink()
            return

        if length_only and length_cache_file_path.exists():
            # loading length from cache
            with length_cache_file_path.open("rb") as f:
                self.length = pickle.load(f)
                assert type(self.length) == int
            return self

        if cache_file_path.exists():
            print(f"[flowder.cache({name})]loading cache...<< {cache_file_path}")
            with cache_file_path.open("rb") as f:
                data = pickle.load(f)
                self.data = data
                self.length = len(data)
                self.iterable_creator = ic_from_array(data)
                self.random_accessor = ra_from_array(data)
            return self
        else:
            if self.data is None:
                desc = f"[flowder.cache({name})]iterating source..."
                if self.has_length:
                    it = tqdm(self.iterable_creator(0), total=len(self), desc=desc)
                else:
                    it = tqdm(self.iterable_creator(0), desc=desc)
                data = list(it)
                self.data = data
                self.length = len(data)
                self.iterable_creator = ic_from_array(data)
                self.random_accessor = ra_from_array(data)

            print(f"[flowder.cache({name})]create cache file...>> {cache_file_path}")
            if not cache_dir.exists():
                cache_dir.mkdir(parents=True)
            with cache_file_path.open("wb") as f:
                pickle.dump(self.data, f)
            with length_cache_file_path.open("wb") as f:
                pickle.dump(len(self.data), f)
            return self
예제 #9
0
 def __call__(self, source):
     if isinstance(source, list) or isinstance(source, tuple):
         source = Source(ic_from_array(source), ra_from_array(source), length=len(source))
     assert isinstance(source, Source), \
         f"Argument for FlatMapped called as function must be Source, but {type(source)} found"
     return source.flat_map(self.transform, dependencies=self.d)