def visual_history(self, id_, folder=None):
        """List with all steps/Data objects before the current one. The current avatar is also generated."""
        uuid = UUID()
        data = None
        lastuuid = UUID(id_)
        firstdata = self.fetch(UUIDData(lastuuid))
        # TODO: solve this check in pjdata
        if firstdata.history is None:
            firstdata.history = []
        history = (list(firstdata.history) == 0) or firstdata.historystr
        if folder:
            lastuuid.generate_avatar(f"{folder}/{f'{id_}.jpg'}")
        lst = []
        for transformer in history:
            if isinstance(transformer, Transformer):
                name = transformer.name
                transformeruuid = transformer.uuid
            else:
                name = transformer["name"]
                transformeruuid = transformer["uuid"]
            dic = {
                "label": uuid.id,
                "name": name,
                "help": str(transformer),
                "stored": data is not None
            }
            if folder:
                filename = f"{uuid}.jpg"
                dic["avatar"] = filename
                uuid.generate_avatar(f"{folder}/{filename}")
            lst.append(dic)
            uuid = uuid * transformeruuid
            data = self.fetch(UUIDData(uuid))

        return lst
Exemple #2
0
    def _fetch_impl(self, data: Data, lock: bool = False) -> Data:
        # Fetch data info.
        uuid = data.uuid
        self.query(f"select * from data where id=?", [uuid.id])
        result = self.get_one()

        if result is None:
            if lock:
                self.lock(data)
            return None
        # values_by_id = {row['id']: row['value'] for row in rall}

        if result["names"] == "":
            print("W: Previously locked by other process.", data)
            raise LockedEntryException(data)

        names = result["names"].split(",")
        mids = result["matrices"].split(",")
        hids = result["history"].split(",")

        name_by_mid = dict(zip(mids, names))

        # Fetch matrices (lazily, if storage_info is provided).
        new_mids = [mid for mid in mids if mid not in data.ids_lst]
        matrices = data.matrices
        if self.storage_info is None:
            matrices_by_mid = self.fetch_dumps(new_mids)
            for mid in new_mids:
                matrices[name_by_mid[mid]] = matrices_by_mid[mid]
        else:
            for mid in new_mids:
                matrices[name_by_mid[mid]] = UUID(mid)

        # Fetch history.
        serialized_tranfs = self.fetch_dumps(hids, aslist=True)
        # TODO: deserializar antes de por no histórico
        history = History(serialized_tranfs)

        # TODO: failure and frozen should be stored/fetched!
        # TODO: would it be worth to update uuid/uuids here, instead of recalculating it from the start at Data.init?
        uuids = data.uuids
        uuids.update(dict(zip(names, map(UUID, mids))))
        return Data(
            uuid=uuid,
            uuids=uuids,
            history=history,
            failure=None,
            frozen=False,
            hollow=False,
            stream=None,
            storage_info=self.storage_info,
            **matrices,
        )
    def uuid(self) -> UUID:
        """Lazily calculated unique identifier for this dataset.

        Should be accessed direct as a class member: 'uuid'.

        Returns
        -------
            A unique identifier UUID object.
        """
        if self._uuid is None:
            content = self._uuid_impl()
            self._uuid = content if isinstance(content, UUID) else UUID(
                content.encode())
        return self._uuid
Exemple #4
0
 def _uuid_impl(self):
     return UUID(self.serialized.encode())
Exemple #5
0
 def _cfuuid_impl(self, data=None):
     return UUID(serialize(self.config["hashes"]).encode())
Exemple #6
0
import numpy as np

from pjdata.aux.compression import pack
from pjdata.aux.uuid import UUID
from pjdata.content.data import Data
# Testes            ############################
from pjdata.history import History

matrices = {
    "X": np.array([[1, 2, 3, 4], [5, 6, 7, 8]]),
    "Y": np.array([[1, 2, 3, 4]]),
    "Xd": ['length', 'width'],
    "Yd": ['class'],
    "Xt": ["real", "real", "real", "real"],
    "Yt": [1, 2, 3, 4]
}
uuids = {k: UUID(pack(v)) for k, v in matrices.items()}
data = Data(uuid=UUID(),
            uuids={
                "X": UUID(),
                "Y": UUID()
            },
            failure=None,
            frozen=False,
            history=History([]),
            hollow=False,
            stream=None,
            **matrices)

print('OK', data)
Exemple #7
0
 def _cfuuid_impl(self, data=None):
     """UUID excluding 'model' and 'enhance' flags. Identifies the transformer."""
     return UUID(self._cfserialized().encode())
Exemple #8
0
 def _uuid_impl(self):
     """Complete UUID; including 'model' and 'enhance' flags. Identifies the component."""
     return self._cfuuid_impl() * UUID(str(self.hasenhancer + self.hasmodel).rjust(14, "0"))
Exemple #9
0
def f():
    a = UUID(int2pmat(2 ** 128 - 1))
    b = UUID('12345678901234')
    (a * b) * b.t
Exemple #10
0
from random import random
from timeit import timeit

from pjdata.aux.linalg import int2pmat, print_binmatrix, pmat2int, \
    int2fac, pmat_mult
from pjdata.aux.uuid import UUID

# Show output of operations.
a = UUID(int2pmat(2 ** 128 - 1))
b = UUID('12345678901234')
c = UUID(1)
print(a, b, c)
print()
print((a * b))
print((a * b) * b)
print((a * b) * b.t)
print((a * b) * c)

fac = int2fac(2 ** 128 + 3214134)

# Check for collisions.
s = set()
r = set()
aa = bb = 0
for i in range(100000):
    while aa in r:
        aa = round(random() * 2 ** 128)
    while bb in r:
        bb = round(random() * 2 ** 128)
    r.add(aa)
    r.add(bb)
Exemple #11
0
def read_arff(filename):
    """
    Create Data from ARFF file.

    Assume X,y classification task and last attribute as target.
    And that there were no transformations (history) on this Data.

    A short hash will be added to the name, to ensure unique names.
    Actually, the first collision is expected after 1M different datasets
    with the same name ( n = 2**(log(107**6, 2)/2) ).
    Since we already expect unique names like 'iris', and any transformed
    dataset is expected to enter the system through a transformer,
    1M should be safe enough. Ideally, a single 'iris' be will stored.
    In practice, no more than a dozen are expected.

    Parameters
    ----------
    filename
        path of the dataset
    description
        dataset description

    Returns
    -------
    (dict of matrix hashes, Data object)
    """
    # Load file.
    file = open(filename, "r")
    dic = arff.load(file, encode_nominal=False)  # ['description', 'relation', 'attributes', 'data']
    name = dic["relation"]
    description = dic["description"]
    file.close()

    # Extract attributes and targets.
    Arr = np.array(dic["data"])
    Att = dic["attributes"][0:-1]
    TgtAtt = dic["attributes"][-1]

    # Extract X values (numeric when possible), descriptions and types.
    X = Arr[:, 0:-1]
    Xd = [tup[0] for tup in Att]
    Xt = [translate_type(tup[1]) for tup in Att]
    if len(nominal_idxs(Xt)) == 0:
        X = X.astype(float)

    # Extract Y values (assumes categorical), descriptions and types.
    Y = np.ascontiguousarray(Arr[:, -1].reshape((Arr.shape[0], 1)))
    Yd = [TgtAtt[0]]
    Yt = [translate_type(TgtAtt[1])]

    # Calculate pseudo-unique hash for X and Y, and a pseudo-unique name.
    matrices = {"X": X, "Y": Y, "Xd": Xd, "Yd": Yd, "Xt": Xt, "Yt": Yt}
    uuids = {k: UUID(pack(v)) for k, v in matrices.items()}
    original_hashes = {k: v.id for k, v in uuids.items()}

    # # old, unique, name...
    # name_ = splitted[-1] + '_' + enc(
    #     md5_int(serialize(original_hashes).encode()))[:6]

    # Generate the first transformation of a Data object: being born.
    faketransformer = FakeStep(FakeFile(filename, original_hashes))
    uuid, uuids = li.evolve_id(UUID(), {}, [faketransformer], matrices)

    # Create a temporary Data object (i.e. with a fake history).
    data = Data(
        history=History([faketransformer]),
        failure=None,
        frozen=False,
        hollow=False,
        stream=None,
        storage_info=None,
        uuid=uuid,
        uuids=uuids,
        X=X,
        Y=Y,
        Xt=Xt,
        Yt=Yt,
        Xd=Xd,
        Yd=Yd,
    )

    # Patch the Data object with the real transformer and history.
    transformer = Step(FakeFile(filename, original_hashes))
    data.history = History([transformer])

    return original_hashes, data, name, description
Exemple #12
0
 def _uuid_impl(self):
     return UUID(jsonable["component_uuid"])