Ejemplo n.º 1
0
    def get(self, key):
        c = self.connection.cursor()
        c.execute(
            f"""
        SELECT
          metadata,
          state_data
        FROM {self.table}
        WHERE query=?
        """,
            [key],
        )

        try:
            metadata, data = c.fetchone()
            metadata = json.loads(metadata)
            if metadata.get("status") != "ready":
                return None
        except:
            return None
        try:
            state = State()
            state = state.from_dict(metadata)

            t = state_types_registry().get(state.type_identifier)
            state.data = t.from_bytes(self.decode(data))
            return state
        except:
            logging.exception(f"Cache failed to recover {key}")
            return None
Ejemplo n.º 2
0
def evaluate_and_save(query,
                      target_directory=None,
                      target_file=None,
                      cache=None):
    """Evaluate query and save result.
    Output is saved either to
    - a target directory (current working directory by default) to a file deduced from the query, or
    - to target_file (if specified)
    Returns a state.
    """

    state = evaluate(query, cache=cache)
    data = state.get()
    reg = state_types_registry()
    t = reg.get(type(data))

    path = target_file
    if path is None:
        if state.extension is None:
            b, mime, typeid = encode_state_data(data)
            path = t.default_filename()
        else:
            b, mime, typeid = encode_state_data(data,
                                                extension=state.extension)
            path = t.default_filename(
            ) if state.filename is None else state.filename
        if target_directory is not None:
            path = os.path.join(target_directory, path)

    with open(path, "wb") as f:
        f.write(b)

    return state
Ejemplo n.º 3
0
def response(state):
    """Create flask response from a State"""
    filename = state.metadata.get("filename")
    b, mimetype, type_identifier = encode_state_data(state.get(),
                                                     extension=state.extension)
    if filename is None:
        filename = state_types_registry().get(
            type_identifier).default_filename()
    return b, mimetype, filename
Ejemplo n.º 4
0
 def contains(self, key):
     state_path = self.to_path(key)
     if os.path.exists(state_path):
         state = State()
         state = state.from_dict(json.loads(open(state_path).read()))
     else:
         return False
     t = state_types_registry().get(state.type_identifier)
     path = self.to_path(key,
                         prefix="data_",
                         extension=t.default_extension())
     if os.path.exists(path):
         return True
     else:
         return False
Ejemplo n.º 5
0
    def remove(self, key):
        metadata = self.get_metadata(key)
        if metadata is None:
            return True
        if "type_identifier" in metadata:
            t = state_types_registry().get(metadata["type_identifier"])
            path = self.to_path(key, prefix="data_", extension=t.default_extension())
            if os.path.exists(path):
                os.remove(path)

        state_path = self.to_path(key)
        if os.path.exists(state_path):
            os.remove(state_path)

        return True
Ejemplo n.º 6
0
    def store(self, state):
        if state.is_error:
            return None
        state.metadata["status"] = "ready"

        t = state_types_registry().get(state.type_identifier)
        path = self.to_path(state.query)
        if self.storage.is_supported(path):
            try:
                b, mime = t.as_bytes(state.data)
                metadata = dict(**state.metadata)
                metadata["mimetype"] = mime
                self.storage.store(path, b, metadata)
                return True
            except:
                return False
        return False
Ejemplo n.º 7
0
 def get(self, key):
     state_path = self.to_path(key)
     if os.path.exists(state_path):
         state = State()
         state = state.from_dict(json.loads(open(state_path).read()))
     else:
         return None
     t = state_types_registry().get(state.type_identifier)
     path = self.to_path(key,
                         prefix="data_",
                         extension=t.default_extension())
     if os.path.exists(path):
         try:
             state.data = t.from_bytes(open(path, "rb").read())
             return state
         except:
             logging.exception(f"Cache failed to recover {key}")
             return None
Ejemplo n.º 8
0
    def store(self, state):
        if state.is_error:
            return None
        state.metadata["status"] = "ready"

        if not self.store_metadata(state.metadata):
            return False

        t = state_types_registry().get(state.type_identifier)
        path = self.to_path(
            state.query, prefix="data_", extension=t.default_extension()
        )
        with open(path, "wb") as f:
            try:
                b, mime = t.as_bytes(state.data)
                f.write(self.encode(b))
            except NotImplementedError:
                return False
        return True
Ejemplo n.º 9
0
    def store(self, state):
        try:
            with open(self.to_path(state.query), "w") as f:
                f.write(json.dumps(state.as_dict()))
        except:
            logging.exception(f"Cache writing error: {state.query}")
            return False

        t = state_types_registry().get(state.type_identifier)
        path = self.to_path(state.query,
                            prefix="data_",
                            extension=t.default_extension())
        with open(path, "wb") as f:
            try:
                b, mime = t.as_bytes(state.data)
                f.write(b)
            except NotImplementedError:
                return False
        return True
Ejemplo n.º 10
0
    def __init__(
        self,
        key,
        item_keys=None,
        extension="parquet",
        number_format="%04d",
        batch_number=0,
        store=None,
    ):
        self.key = key
        self.item_keys = item_keys or []
        self.extension = extension
        self.number_format = number_format
        self.batch_number = batch_number
        self.state_type = state_types_registry().get("dataframe")

        if store is None:
            store = get_store()
        self.store = store
Ejemplo n.º 11
0
    def get(self, query):
        """Main service for evaluating queries"""
        state = evaluate(query)
        filename = state.filename
        extension = None
        if filename is not None:
            if "." in filename:
                extension = filename.split(".")[-1]

        b, mimetype, type_identifier = encode_state_data(state.get(),
                                                         extension=extension)
        if filename is None:
            filename = state_types_registry().get(
                type_identifier).default_filename()

        header = "Content-Type"
        body = mimetype
        self.set_header(header, body)

        self.write(b)
Ejemplo n.º 12
0
    def get(self, key):
        metadata = self.get_metadata(key)
        if metadata is None:
            print(f"(FileCache) Metadata missing: {key}")
            return None
        if metadata.get("status") != "ready":
            print(f"(FileCache) Not ready {key}; ", metadata.get("status"))
            return None
        state = State()
        state.metadata = metadata

        t = state_types_registry().get(metadata["type_identifier"])
        path = self.to_path(key, prefix="data_", extension=t.default_extension())
        if os.path.exists(path):
            try:
                state.data = t.from_bytes(self.decode(open(path, "rb").read()))
                return state
            except:
                traceback.print_exc()
                logging.exception(f"Cache failed to recover {key}")
                return None
Ejemplo n.º 13
0
def response(state):
    """Create flask response from a State"""
    filename = state.filename
    extension = None
    if filename is not None:
        if "." in filename:
            extension = filename.split(".")[-1]
    b, mimetype, type_identifier = encode_state_data(state.get(),
                                                     extension=extension)
    if filename is None:
        filename = state_types_registry().get(
            type_identifier).default_filename()
    r = make_response(b)

    r.headers.set('Content-Type', mimetype)
    if mimetype not in [
            "application/json", 'text/plain', 'text/html', 'text/csv',
            'image/png', 'image/svg+xml'
    ]:
        r.headers.set('Content-Disposition', 'attachment', filename=filename)
    return r
Ejemplo n.º 14
0
def response(state):
    """Create flask response from a State"""
    b, mimetype, type_identifier = encode_state_data(state.get(),
                                                     extension=state.extension)
    filename = state.metadata.get("filename")
    if filename is None:
        filename = state_types_registry().get(
            type_identifier).default_filename()
    r = make_response(b)

    r.headers.set("Content-Type", mimetype)
    if mimetype not in [
            "application/json",
            "text/plain",
            "text/html",
            "text/csv",
            "image/png",
            "image/svg+xml",
    ]:
        r.headers.set("Content-Disposition", "attachment", filename=filename)
    return r
Ejemplo n.º 15
0
    def store(self, state):
        if state.is_error:
            return None
        state.metadata["status"] = "ready"

        key = state.query
        metadata = json.dumps(state.as_dict())

        t = state_types_registry().get(state.type_identifier)
        try:
            b, mime = t.as_bytes(state.data)
        except NotImplementedError:
            return False
        self._available_keys = None
        if self.delete_before_insert:
            self.connection.execute(f"DELETE FROM {self.table} WHERE query=?", [key])
        self.connection.execute(
            f"INSERT INTO {self.table} (query, metadata, state_data) VALUES (?, ?, ?)",
            [key, metadata, self.encode(b)],
        )
        self.connection.commit()
        return True
Ejemplo n.º 16
0
    def get(self, key):
        print(f"GET {key}")
        metadata = self.get_metadata(key)
        print(f"  METADATA {metadata}")
        if metadata is None:
            print(f"(StoreCache) Metadata missing: {key}")
            return None
        if metadata.get("status") != "ready":
            print(f"(StoreCache) Not ready {key}; ", metadata.get("status"))
            return None
        state = State()
        state.metadata = metadata

        t = state_types_registry().get(metadata["type_identifier"])
        path = self.to_path(key)
        if self.storage.contains(path):
            try:
                state.data = t.from_bytes(self.decode(self.storage.get_bytes(path)))
                return state
            except:
                traceback.print_exc()
                logging.exception(f"Cache failed to recover {key}")
                return None
Ejemplo n.º 17
0
 def state_types_registry(self):
     return state_types_registry()
Ejemplo n.º 18
0
def dr(state, type_identifier=None, extension=None, context=None):
    """Decode resource
    Decodes the bytes into a data structure. This is meant to be used in connection to a resource query.
    Resource part of the query will typically fetch the data from a store and thus return bytes (together with metadata).
    Command dr will convert the bytes (assuming proper metadata are provided) into a data structure.
    The metadata must contain type_identifier in metadata or metadata['resource_metadata'], a filename with extension
    or extension with known decoding.
    """
    from liquer.state_types import state_types_registry
    from liquer.parser import parse

    if state.data is None:
        context.error(
            f"Bytes expected, None received in dr from {state.query}")
        return

    if type_identifier is None:
        type_identifier = state.metadata.get(
            "type_identifier",
            state.metadata.get("resource_metadata", {}).get("type_identifier"),
        )

    if type_identifier in (None, "bytes"):
        type_identifier = state.metadata.get("resource_metadata",
                                             {}).get("type_identifier")

    if extension is None:
        extension = state.metadata.get("extension")
    if extension is None:
        query = state.metadata.get("query")
        if query is not None:
            filename = parse(query).filename()
        if filename is not None:
            v = filename.split(".")
            if len(v) > 1:
                extension = v[-1]
                context.info(f"Extension: {extension} - from query '{query}'")
        else:
            key = state.metadata.get("resource_metadata", {}).get("key")
            if key is not None:
                filename = context.store().key_name(key)
            v = filename.split(".")
            if len(v) > 1:
                extension = v[-1]
                context.info(f"Extension: {extension} - from key '{key}'")

    if type_identifier in (None, "bytes"):
        type_identifier = type_identifier_from_extension(extension)
        context.info(
            f"Type identifier: {type_identifier} - from extension '{extension}'"
        )

    if type_identifier is not None:
        if extension in ("parquet", "xlsx", "csv",
                         "tsv") and type_identifier in ("generic",
                                                        "dictionary",
                                                        "pickle"):
            context.warning(
                f"Type identifier '{type_identifier}' seems to be inconsistent with the extension '{extension}'"
            )
            context.warning(
                f"This might indicate a problem with executing the partent query '{context.parent_query}'"
            )
            type_identifier = type_identifier_by_extension.get(extension)
            context.warning(
                f"To fix the inconsistency, type identifier: {type_identifier} is used from extension '{extension}'"
            )

        context.info(
            f"Type identifier: {type_identifier},  Extension: {extension}")
        t = state_types_registry().get(type_identifier)
        data = t.from_bytes(state.data, extension=extension)
        return state.with_data(data)
    else:
        context.error(f"Decode resource (dr) command failed")
        raise Exception(
            f"Failed to resolve type for query {state.metadata.get('query')}")
    return state