예제 #1
0
파일: cube.py 프로젝트: mars-f/ActiveData
    def window(self, window):
        if window.edges or window.sort:
            raise NotImplementedError()

        from jx_python import jx

        # SET OP
        canonical = self.data.values()[0]
        accessor = jx.get(window.value)
        cnames = self.data.keys()

        # ANNOTATE EXISTING CUBE WITH NEW COLUMN
        m = self.data[window.name] = Matrix(dims=canonical.dims)
        for coord in canonical._all_combos():
            row = Data(
            )  # IT IS SAD WE MUST HAVE A Data(), THERE ARE {"script": expression} USING THE DOT NOTATION
            for k in cnames:
                row[k] = self.data[k][coord]
            for c, e in zip(coord, self.edges):
                row[e.name] = e.domain.partitions[c]
            m[coord] = accessor(
                row, Null,
                Null)  # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO

        self.select.append(window)
        return self
예제 #2
0
    def __init__(
        self,
        rollover_field,  # the FIELD with a timestamp to use for determining which index to push to
        rollover_interval,  # duration between roll-over to new index
        rollover_max,  # remove old indexes, do not add old records
        schema,  # es schema
        queue_size=10000,  # number of documents to queue in memory
        batch_size=5000,  # number of documents to push at once
        typed=None,  # indicate if we are expected typed json
        kwargs=None  # plus additional ES settings
    ):
        if kwargs.tjson != None:
            Log.error("not expected")
        if typed == None:
            Log.error("not expected")

        schema.settings.index.max_result_window = 100000  # REQUIRED FOR ACTIVEDATA NESTED QUERIES
        schema.settings.index.max_inner_result_window = 100000  # REQUIRED FOR ACTIVEDATA NESTED QUERIES

        self.settings = kwargs
        self.locker = Lock("lock for rollover_index")
        self.rollover_field = jx.get(rollover_field)
        self.rollover_interval = self.settings.rollover_interval = Duration(
            rollover_interval)
        self.rollover_max = self.settings.rollover_max = Duration(rollover_max)
        self.known_queues = {}  # MAP DATE TO INDEX
        self.cluster = elasticsearch.Cluster(self.settings)
예제 #3
0
    def __init__(self, name, db=None, uid=UID, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        if db:
            self.db = db
        else:
            self.db = db = Sqlite()

        if not _config:
            # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE
            from jx_base.container import config as _config
            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        self.sf = Snowflake(fact=name, uid=uid, db=db)

        self._next_guid = generateGuid()
        self._next_uid = 1
        self._make_digits_table()
        self.uid_accessor = jx.get(self.sf.uid)
예제 #4
0
 def __init__(self,
              rollover_field,
              rollover_interval,
              rollover_max,
              queue_size=10000,
              batch_size=5000,
              kwargs=None):
     """
     :param rollover_field: the FIELD with a timestamp to use for determining which index to push to
     :param rollover_interval: duration between roll-over to new index
     :param rollover_max: remove old indexes, do not add old records
     :param queue_size: number of documents to queue in memory
     :param batch_size: number of documents to push at once
     :param kwargs: plus additional ES settings
     :return:
     """
     self.settings = kwargs
     self.locker = Lock("lock for rollover_index")
     self.rollover_field = jx.get(rollover_field)
     self.rollover_interval = self.settings.rollover_interval = Duration(
         kwargs.rollover_interval)
     self.rollover_max = self.settings.rollover_max = Duration(
         kwargs.rollover_max)
     self.known_queues = {}  # MAP DATE TO INDEX
     self.cluster = elasticsearch.Cluster(self.settings)
예제 #5
0
 def search(self, query):
     query = to_data(query)
     f = jx.get(query.query.filtered.filter)
     filtered = list_to_data([{
         "_id": i,
         "_source": d
     } for i, d in self.data.items() if f(d)])
     if query.fields:
         return dict_to_data({
             "hits": {
                 "total":
                 len(filtered),
                 "hits": [{
                     "_id":
                     d._id,
                     "fields":
                     unwrap(
                         jx.select([unwrap(d._source)], query.fields)[0])
                 } for d in filtered]
             }
         })
     else:
         return dict_to_data(
             {"hits": {
                 "total": len(filtered),
                 "hits": filtered
             }})
예제 #6
0
    def __init__(self, name, db=None, uid=UID, kwargs=None):
        """
        :param name: NAME FOR THIS TABLE
        :param db: THE DB TO USE
        :param uid: THE UNIQUE INDEX FOR THIS TABLE
        :return: HANDLE FOR TABLE IN db
        """
        global _config
        if isinstance(db, Sqlite):
            self.db = db
        else:
            self.db = db = Sqlite(db)

        if not _config:
            # REGISTER sqlite AS THE DEFAULT CONTAINER TYPE
            from jx_base.container import config as _config

            if not _config.default:
                _config.default = {"type": "sqlite", "settings": {"db": db}}

        ns = Namespace(db=db)
        self.facts = ns.create_or_replace_facts(fact_name=name)

        self._next_guid = generateGuid()
        self._next_uid = 1
        self._make_digits_table()
        self.uid_accessor = jx.get(uid)
예제 #7
0
    def __init__(self, es=None, id_info=None):
        self.es = es
        self.id_info = id_info
        self.get_id = jx.get(id_info.field)
        self.get_version = jx.get(id_info.version)

        if es:
            _schema = Data()
            for c in parse_properties(es.settings.alias, ".", ROOT_PATH, es.get_properties()):
                if c.es_type in (OBJECT, NESTED):
                    _schema[c.name] = {}
                else:
                    _schema[c.name] = c
            self.schema = unwrap(_schema)
        else:
            self.schema = {}
예제 #8
0
    def __init__(
        self,
        rollover_field,      # the FIELD with a timestamp to use for determining which index to push to
        rollover_interval,   # duration between roll-over to new index
        rollover_max,        # remove old indexes, do not add old records
        schema,              # es schema
        queue_size=10000,    # number of documents to queue in memory
        batch_size=5000,     # number of documents to push at once
        typed=None,          # indicate if we are expected typed json
        kwargs=None          # plus additional ES settings
    ):
        if kwargs.tjson != None:
            Log.error("not expected")
        if typed == None:
            Log.error("not expected")

        schema.settings.index.max_inner_result_window = 100000  # REQUIRED FOR ACTIVEDATA NESTED QUERIES

        self.settings = kwargs
        self.locker = Lock("lock for rollover_index")
        self.rollover_field = jx.get(rollover_field)
        self.rollover_interval = self.settings.rollover_interval = Duration(rollover_interval)
        self.rollover_max = self.settings.rollover_max = Duration(rollover_max)
        self.known_queues = {}  # MAP DATE TO INDEX
        self.cluster = elasticsearch.Cluster(self.settings)
예제 #9
0
 def search(self, query):
     query = wrap(query)
     f = jx.get(query.query.filtered.filter)
     filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)])
     if query.fields:
         return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}})
     else:
         return wrap({"hits": {"total": len(filtered), "hits": filtered}})
예제 #10
0
    def update(self, command):
        """
        EXPECTING command == {"set":term, "clear":term, "where":where}
        THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES
        THE where CLAUSE IS A JSON EXPRESSION FILTER
        """
        command = to_data(command)
        command_clear = listwrap(command["clear"])
        command_set = command.set.items()
        command_where = jx.get(command.where)

        for c in self.data:
            if command_where(c):
                for k in command_clear:
                    c[k] = None
                for k, v in command_set:
                    c[k] = v
예제 #11
0
    def window(self, window):
        if window.edges or window.sort:
            raise NotImplementedError()

        from jx_python import jx

        # SET OP
        canonical = self.data.values()[0]
        accessor = jx.get(window.value)
        cnames = self.data.keys()

        # ANNOTATE EXISTING CUBE WITH NEW COLUMN
        m = self.data[window.name] = Matrix(dims=canonical.dims)
        for coord in canonical._all_combos():
            row = Data()  # IT IS SAD WE MUST HAVE A Data(), THERE ARE {"script": expression} USING THE DOT NOTATION
            for k in cnames:
                row[k] = self.data[k][coord]
            for c, e in zip(coord, self.edges):
                row[e.name] = e.domain.partitions[c]
            m[coord] = accessor(row, Null, Null)  # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO

        self.select.append(window)
        return self
예제 #12
0
파일: doc_store.py 프로젝트: rv404674/TUID
    def _index_columns(self, columns):
        # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS
        indexed_values = [None]*len(columns)
        for i, s in enumerate(columns):
            index = self._index.get(s.value, None)
            if index is not None:
                indexed_values[i]=index
                continue

            function_name = value2json(s.value.__data__(), sort_keys=True)
            index = self._index.get(function_name, None)
            indexed_values[i]=index
            if index is not None:
                continue

            indexed_values[i] = index = self._index[function_name] = {}
            accessor = jx.get(s.value)
            for k, ii in self._unique_index.items():
                v = accessor(self._source[ii])
                j = index.get(v)
                if j is None:
                    j = index[v] = set()
                j |= {ii}
        return indexed_values
예제 #13
0
    def _index_columns(self, columns):
        # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS
        indexed_values = [None] * len(columns)
        for i, s in enumerate(columns):
            index = self._index.get(s.value, None)
            if index is not None:
                indexed_values[i] = index
                continue

            function_name = value2json(s.value.__data__(), sort_keys=True)
            index = self._index.get(function_name, None)
            indexed_values[i] = index
            if index is not None:
                continue

            indexed_values[i] = index = self._index[function_name] = {}
            accessor = jx.get(s.value)
            for k, ii in self._unique_index.items():
                v = accessor(self._source[ii])
                j = index.get(v)
                if j is None:
                    j = index[v] = set()
                j |= {ii}
        return indexed_values