def _index_columns(self, columns): # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS indexed_values = [None] * len(columns) for i, s in enumerate(columns): index = self._index.get(s.value, None) if index is not None: indexed_values[i] = index continue function_name = convert.value2json(s.value.to_dict(), sort_keys=True) index = self._index.get(function_name, None) indexed_values[i] = index if index is not None: continue indexed_values[i] = index = self._index[function_name] = {} accessor = jx.get(s.value) for k, ii in self._unique_index.items(): v = accessor(self._source[ii]) j = index.get(v) if j is None: j = index[v] = set() j |= {ii} return indexed_values
def window(self, window): if window.edges or window.sort: Log.error("not implemented") from pyLibrary.queries import jx # SET OP canonical = self.data.values()[0] accessor = jx.get(window.value) cnames = self.data.keys() # ANNOTATE EXISTING CUBE WITH NEW COLUMN m = self.data[window.name] = Matrix(dims=canonical.dims) for coord in canonical._all_combos(): row = Dict( ) # IT IS SAD WE MUST HAVE A Dict(), THERE ARE {"script": expression} USING THE DOT NOTATION for k in cnames: row[k] = self.data[k][coord] for c, e in zip(coord, self.edges): row[e.name] = e.domain.partitions[c] m[coord] = accessor( row, Null, Null) # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO self.select.append(window) return self
def __init__(self, rollover_field, rollover_interval, rollover_max, queue_size=10000, batch_size=5000, kwargs=None): """ :param rollover_field: the FIELD with a timestamp to use for determining which index to push to :param rollover_interval: duration between roll-over to new index :param rollover_max: remove old indexes, do not add old records :param queue_size: number of documents to queue in memory :param batch_size: number of documents to push at once :param kwargs: plus additional ES settings :return: """ self.settings = kwargs self.locker = Lock("lock for rollover_index") self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration( kwargs.rollover_interval) self.rollover_max = self.settings.rollover_max = Duration( kwargs.rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def search(self, query): query = wrap(query) f = jx.get(query.query.filtered.filter) filtered = wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total": len(filtered), "hits": [{"_id": d._id, "fields": unwrap(jx.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total": len(filtered), "hits": filtered}})
def __init__(self, name, db=None, uid=UID_PREFIX+"id", exists=False): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ Container.__init__(self, frum=None) if db: self.db = db else: self.db = db = Sqlite() self.name = name self.uid = listwrap(uid) self.columns = {} for u in uid: if not self.columns.get(u, None): cs = self.columns[u] = set() if u.startswith(UID_PREFIX): cs.add(Column(name=u, table=name, type="integer", es_column=typed_column(u, "integer"), es_index=name)) else: cs.add(Column(name=u, table=name, type="text", es_column=typed_column(u, "text"), es_index=name)) self.uid_accessor = jx.get(self.uid) self.nested_tables = {} # MAP FROM TABLE NAME TO Table OBJECT if exists: # LOAD THE COLUMNS command = "PRAGMA table_info(" + quote_table(name) + ")" details = self.db.query(command) self.columns = {} for r in details: cname = untyped_column(r[1]) ctype = r[2].lower() column = Column( name=cname, table=name, type=ctype, es_column=typed_column(cname, ctype), es_index=name ) cs = self.columns.get(name, Null) if not cs: cs = self.columns[name] = set() cs.add(column) else: command = "CREATE TABLE " + quote_table(name) + "(" + \ (",".join(_quote_column(c) + " " + c.type for u, cs in self.columns.items() for c in cs)) + \ ", PRIMARY KEY (" + \ (", ".join(_quote_column(c) for u in self.uid for c in self.columns[u])) + \ "))" self.db.execute(command)
def __init__(self, rollover_field, rollover_interval, rollover_max, queue_size=10000, batch_size=5000, settings=None): """ :param rollover_field: the FIELD with a timestamp to use for determining which index to push to :param rollover_interval: duration between roll-over to new index :param rollover_max: remove old indexes, do not add old records :param queue_size: number of documents to queue in memory :param batch_size: number of documents to push at once :param settings: plus additional ES settings :return: """ self.settings = settings self.rollover_field = jx.get(rollover_field) self.rollover_interval = self.settings.rollover_interval = Duration(settings.rollover_interval) self.rollover_max = self.settings.rollover_max = Duration(settings.rollover_max) self.known_queues = {} # MAP DATE TO INDEX self.cluster = elasticsearch.Cluster(self.settings)
def update(self, command): """ EXPECTING command == {"set":term, "clear":term, "where":where} THE set CLAUSE IS A DICT MAPPING NAMES TO VALUES THE where CLAUSE IS A JSON EXPRESSION FILTER """ command = wrap(command) command_clear = listwrap(command["clear"]) command_set = command.set.items() command_where = jx.get(command.where) for c in self.data: if command_where(c): for k in command_clear: c[k] = None for k, v in command_set: c[k] = v
def window(self, window): if window.edges or window.sort: Log.error("not implemented") from pyLibrary.queries import jx # SET OP canonical = self.data.values()[0] accessor = jx.get(window.value) cnames = self.data.keys() # ANNOTATE EXISTING CUBE WITH NEW COLUMN m = self.data[window.name] = Matrix(dims=canonical.dims) for coord in canonical._all_combos(): row = Data() # IT IS SAD WE MUST HAVE A Data(), THERE ARE {"script": expression} USING THE DOT NOTATION for k in cnames: row[k] = self.data[k][coord] for c, e in zip(coord, self.edges): row[e.name] = e.domain.partitions[c] m[coord] = accessor(row, Null, Null) # DUMMY Null VALUES BECAUSE I DO NOT KNOW WHAT TO DO self.select.append(window) return self
def search(self, query): query = wrap(query) f = jx.get(query.query.filtered.filter) filtered = wrap([{ "_id": i, "_source": d } for i, d in self.data.items() if f(d)]) if query.fields: return wrap({ "hits": { "total": len(filtered), "hits": [{ "_id": d._id, "fields": unwrap( jx.select([unwrap(d._source)], query.fields)[0]) } for d in filtered] } }) else: return wrap({"hits": {"total": len(filtered), "hits": filtered}})
def _index_values(self, columns): # INDEX ALL COLUMNS, ESPECIALLY THOSE FUNCTION RESULTS indexed_values = [None]*len(columns) for i, s in enumerate(columns): index = self._index.get(s.value, None) if index is not None: indexed_values[i]=index continue function_name = convert.value2json(s.value.to_dict(), sort_keys=True) index = self._index.get(function_name, None) indexed_values[i]=index if index is not None: continue indexed_values[i] = index = self._index[function_name] = {} accessor = jx.get(s.value) for k, ii in self._unique_index.items(): v = accessor(self._source[ii]) j = index.get(v) if j is None: j = index[v] = set() j |= {ii} return indexed_values
def __init__(self, name, db=None, uid=GUID, exists=False, kwargs=None): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ global _config Container.__init__(self, frum=None) if db: self.db = db else: self.db = db = Sqlite() if not _config: from pyLibrary.queries.containers import config as _config if not _config.default: _config.default = {"type": "sqlite", "settings": {"db": db}} self.name = name self.uid = listwrap(uid) self._next_uid = 1 self._make_digits_table() self.uid_accessor = jx.get(self.uid) self.nested_tables = OrderedDict( ) # MAP FROM NESTED PATH TO Table OBJECT, PARENTS PROCEED CHILDREN self.nested_tables["."] = self self.columns = Index( keys=[join_field(["names", self.name])] ) # MAP FROM DOCUMENT ABS PROPERTY NAME TO THE SET OF SQL COLUMNS IT REPRESENTS (ONE FOR EACH REALIZED DATATYPE) if not exists: for u in self.uid: if u == GUID: pass else: c = Column(names={name: u}, type="string", es_column=typed_column(u, "string"), es_index=name) self.add_column_to_schema(self.nested_tables, c) command = ("CREATE TABLE " + quote_table(name) + "(" + (",".join([quoted_UID + " INTEGER"] + [ _quote_column(c) + " " + sql_types[c.type] for u, cs in self.columns.items() for c in cs ])) + ", PRIMARY KEY (" + (", ".join([quoted_UID] + [ _quote_column(c) for u in self.uid for c in self.columns[u] ])) + "))") self.db.execute(command) else: # LOAD THE COLUMNS command = "PRAGMA table_info(" + quote_table(name) + ")" details = self.db.query(command) for r in details: cname = untyped_column(r[1]) ctype = r[2].lower() column = Column(names={name: cname}, type=ctype, nested_path=['.'], es_column=typed_column(cname, ctype), es_index=name) self.add_column_to_schema(self.columns, column)
def __init__(self, name, db=None, uid=UID_PREFIX + "id", exists=False): """ :param name: NAME FOR THIS TABLE :param db: THE DB TO USE :param uid: THE UNIQUE INDEX FOR THIS TABLE :return: HANDLE FOR TABLE IN db """ Container.__init__(self, frum=None) if db: self.db = db else: self.db = db = Sqlite() self.name = name self.uid = listwrap(uid) self.columns = {} for u in uid: if not self.columns.get(u, None): cs = self.columns[u] = set() if u.startswith(UID_PREFIX): cs.add( Column(name=u, table=name, type="integer", es_column=typed_column(u, "integer"), es_index=name)) else: cs.add( Column(name=u, table=name, type="text", es_column=typed_column(u, "text"), es_index=name)) self.uid_accessor = jx.get(self.uid) self.nested_tables = {} # MAP FROM TABLE NAME TO Table OBJECT if exists: # LOAD THE COLUMNS command = "PRAGMA table_info(" + quote_table(name) + ")" details = self.db.query(command) self.columns = {} for r in details: cname = untyped_column(r[1]) ctype = r[2].lower() column = Column(name=cname, table=name, type=ctype, es_column=typed_column(cname, ctype), es_index=name) cs = self.columns.get(name, Null) if not cs: cs = self.columns[name] = set() cs.add(column) else: command = "CREATE TABLE " + quote_table(name) + "(" + \ (",".join(_quote_column(c) + " " + c.type for u, cs in self.columns.items() for c in cs)) + \ ", PRIMARY KEY (" + \ (", ".join(_quote_column(c) for u in self.uid for c in self.columns[u])) + \ "))" self.db.execute(command)