Esempio n. 1
0
 def __init__(self, parent, items, xIsListOrTuple, yIsListOrTuple):
     self.parent = parent
     self.items = items
     self.xIsListOrTuple = xIsListOrTuple
     self.yIsListOrTuple = yIsListOrTuple
     inherit_dataset_params(parent, self)
     if hasattr(parent, "name"):
         self.origName = getattr(parent, "name")
 def __init__(self, parent):
     super().__init__()
     self.expectsItem = False
     self.parent = parent
     if isinstance(parent, PreprocessedDataSet) or isinstance(
             parent, DataSet):
         self._parent_supports_target = True
     else:
         self._parent_supports_target = False
     inherit_dataset_params(parent, self)
 def wrapper(input, **kwargs):
     f = func
     if inspect.isclass(f):
         n = f.__name__
         f = f(**kwargs)
         f.__name__ = n
         kwargs = {}
     if isinstance(input, CompositeDataSet):
         components = list(
             map(lambda x: PreprocessedDataSet(x, f, expectsItem, **kwargs),
                 input.components))
         compositeDS = CompositeDataSet(components)
         inherit_dataset_params(input, compositeDS)
         if hasattr(input, "name"):
             compositeDS.name = input.name + f.__name__ + str(
                 _sorted_args(kwargs))
             compositeDS.origName = compositeDS.name
         return compositeDS
     else:
         return PreprocessedDataSet(input, f, expectsItem, **kwargs)
Esempio n. 4
0
 def __init__(self, parent, items):
     self.parent = parent
     self.items = items
     inherit_dataset_params(parent, self)
Esempio n. 5
0
    def ccc1(input):
        try:
            if hasattr(context.context, "no_cache"):
                return input
            name = "data"
            id = "dataset"
            l = len(input)

            if hasattr(input, "name"):
                id = getattr(input, "name")
                name = id.replace("{", "").replace("[", "").replace(
                    "/", "").replace("\\", "").replace("]", "").replace(
                        "}", "").replace(" ", "").replace(",", "").replace(
                            "\'", "").replace(":", "")

            name = get_cache_dir() + name
            if name in storage:
                r = storage[name]
                inherit_dataset_params(input, r)
                return r

            if isinstance(input, CompositeDataSet):
                components = list(map(lambda x: ccc1(x), input.components))
                compositeDS = CompositeDataSet(components)
                inherit_dataset_params(input, compositeDS)
                if hasattr(input, "name"):
                    compositeDS.origName = input.name
                return compositeDS

            i0 = input[0]
            i0x = i0.x
            xIsList = isinstance(i0x, list)
            i0y = i0.y
            if not xIsList:
                shapeX = np.concatenate(([l], i0x.shape))
            else:
                shapeX = list(
                    map(lambda x: np.concatenate(([l], x.shape)), i0x))

            yIsList = isinstance(i0y, list)
            if not yIsList:
                shapeY = np.concatenate(([l], i0y.shape))
            else:
                shapeY = list(
                    map(lambda x: np.concatenate(([l], x.shape)), i0y))
            data = None
            ext = "dscache"
            if os.path.exists(name):
                if not os.path.isdir(name):
                    #old style
                    data = load(name)
                elif os.path.exists(f"{name}/x_0.{ext}"):
                    if not xIsList:
                        data = (np.zeros(shapeX, i0x.dtype),
                                np.zeros(shapeY, i0y.dtype))
                    else:
                        data = (list(
                            map(lambda x: np.zeros(x, i0x[0].dtype),
                                shapeX)), np.zeros(shapeY, i0y.dtype))
                    try:
                        readArray(data[0], f"{name}/x_", ext,
                                  "Loading X cache...", l)
                    except ValueError:
                        raise ValueError(
                            f"Stored X has unexpected size for dataset '{name}'. Path: "
                            + name)

                    try:
                        readArray(data[1], f"{name}/y_", ext,
                                  "Loading Y cache...", l)
                    except ValueError:
                        raise ValueError(
                            f"Stored Y has unexpected size for dataset '{name}'. Path: "
                            + name)

            if data is None:
                if not xIsList:
                    data = (np.zeros(shapeX,
                                     i0x.dtype), np.zeros(shapeY, i0y.dtype))
                else:
                    data = (list(
                        map(lambda x: np.zeros(x, i0x[0].dtype),
                            shapeX)), np.zeros(shapeY, i0y.dtype))

                # if not xIsList:
                #     def func(i):
                #         data[0][i] = input[i].x
                #         data[1][i] = input[i].y
                # else:
                #     def func(i):
                #         for j in range(len(shapeX)):
                #             data[0][j][i] = input[i].x[j]
                #             data[1][i] = input[i].y

                # pool = Pool(4)
                # zip(*pool.map(func, range(0, l)))

                for i in tqdm.tqdm(range(l), "building disk cache for:" + id):
                    if not xIsList:
                        data[0][i] = input[i].x
                    else:
                        for j in range(len(shapeX)):
                            data[0][j][i] = input[i].x[j]
                    data[1][i] = input[i].y

                if not os.path.isdir(name):
                    os.mkdir(name)
                dumpArray(data[0], f"{name}/x_", ext, "Saving X cache...")
                dumpArray(data[1], f"{name}/y_", ext, "Saving Y cache...")

            result = DiskCache(input, data)

            inherit_dataset_params(input, result)
            if hasattr(input, "name"):
                result.origName = input.name
            storage[name] = result
            return result
        finally:
            pass
Esempio n. 6
0
    def ccc1(input):

        try:
            name = cache_name(input)
            name = get_cache_dir() + name
            if name in storage:
                return storage[name]

            if isinstance(input, CompositeDataSet):
                components = list(map(lambda x: ccc1(x), input.components))
                compositeDS = CompositeDataSet(components)
                inherit_dataset_params(input, compositeDS)
                if hasattr(input, "name"):
                    compositeDS.origName = input.name
                return compositeDS

            data = None
            xStructPath = f"{name}/x.struct"
            yStructPath = f"{name}/y.struct"
            blocksCountPath = f"{name}/blocks_count.int"
            if os.path.exists(xStructPath) and os.path.exists(
                    yStructPath) and os.path.exists(blocksCountPath):
                blocksCount = load(blocksCountPath)
                xStruct = load(xStructPath)
                yStruct = load(yStructPath)
                xIsListOrTuple = xStruct[2] in ["list", "tuple"]
                yIsListOrTuple = yStruct[2] in ["list", "tuple"]

                xData, yData = init_buffers(xStruct, yStruct)

                for blockInd in tqdm.tqdm(range(blocksCount),
                                          "loading disk cache for:" + name):
                    if not xIsListOrTuple:
                        blockPath = f"{name}/x_{blockInd}.dscache"
                        if os.path.exists(blockPath):
                            xBuff = load(blockPath)
                            for x in xBuff:
                                xData.append(x)
                        else:
                            raise Exception(f"Cache block is missing: {name}")
                    else:
                        for c in range(len(xStruct[0])):
                            blockPath = f"{name}/x_{blockInd}_{c}.dscache"
                            if os.path.exists(blockPath):
                                xBuff = load(blockPath)
                                for x in xBuff:
                                    xData[c].append(x)
                            else:
                                raise Exception(
                                    f"Cache block is missing: {name}")
                    if not yIsListOrTuple:
                        blockPath = f"{name}/y_{blockInd}.dscache"
                        if os.path.exists(blockPath):
                            yBuff = load(blockPath)
                            for y in yBuff:
                                yData.append(y)
                        else:
                            raise Exception(f"Cache block is missing: {name}")
                    else:
                        for c in range(len(yStruct[0])):
                            blockPath = f"{name}/y_{blockInd}_{c}.dscache"
                            if os.path.exists(blockPath):
                                yBuff = load(blockPath)
                                for y in yBuff:
                                    yData[c].append(y)
                            else:
                                raise Exception(
                                    f"Cache block is missing: {name}")

                data = (xData, yData)

            if data is None:
                if not os.path.isdir(name):
                    os.mkdir(name)

                i0 = input[0]
                i0x = i0.x
                i0y = i0.y
                l = len(input)

                xStruct = inspect_structure(i0x)
                yStruct = inspect_structure(i0y)

                xIsListOrTuple = xStruct[2] in ["list", "tuple"]
                yIsListOrTuple = yStruct[2] in ["list", "tuple"]

                xData, yData = init_buffers(xStruct, yStruct)

                buffSize = 0

                barrier = 64 * 1024 * 1024

                blockInd = 0
                for i in tqdm.tqdm(range(l), "building disk cache for:" + id):
                    item = input[i]
                    if not xIsListOrTuple:
                        xData.append(item.x)
                    else:
                        for c in range(len(xStruct[0])):
                            xData[c].append(item.x[c])
                    if not yIsListOrTuple:
                        yData.append(item.y)
                    else:
                        for c in range(len(yStruct[0])):
                            yData[c].append(item.y[c])

                    buffSize += get_size(item.x)
                    buffSize += get_size(item.y)

                    if buffSize > barrier or i == l - 1:

                        if not xIsListOrTuple:
                            arr = xData
                            if xStruct[0][0].startswith("int") or xStruct[0][
                                    0].startswith("float"):
                                arr = np.array(arr)
                            save(f"{name}/x_{blockInd}.dscache", arr)
                        else:
                            for c in range(len(xStruct[0])):
                                arr = xData[c]
                                if xStruct[0][c].startswith("int") or xStruct[
                                        0][c].startswith("float"):
                                    arr = np.array(arr)
                                save(f"{name}/x_{blockInd}_{c}.dscache", arr)

                        if not yIsListOrTuple:
                            arr = yData
                            if yStruct[0][0].startswith("int") or yStruct[0][
                                    0].startswith("float"):
                                arr = np.array(arr)
                            save(f"{name}/y_{blockInd}.dscache", arr)
                        else:
                            for c in range(len(yStruct[0])):
                                arr = yData[c]
                                if yStruct[0][c].startswith("int") or yStruct[
                                        0][c].startswith("float"):
                                    arr = np.array(arr)
                                save(f"{name}/y_{blockInd}_{c}.dscache", arr)

                        buffSize = 0
                        blockInd += 1
                        xData, yData = init_buffers(xStruct, yStruct)
                        pass

                save(xStructPath, xStruct)
                save(yStructPath, yStruct)
                save(blocksCountPath, blockInd)
                return ccc1(input)
            result = DiskCache1(input, data, xIsListOrTuple, yIsListOrTuple)
            storage[name] = result
            return result
        finally:
            pass