def get_query(model, info, filters={}, order_by=[], page=None, paginate_by=None, total_query=None): query = None orig_query = copy(model) if isinstance(model, Query): query = model model = query.objects().model if isinstance(model, (Model, ModelBase)): alias_map = {} selections = next(field for field in info.field_asts if field.name.value == info.field_name).selection_set.selections requested_model, requested_joins, requested_fields = get_requested_models(model, selections, alias_map) if query is None: query = requested_model.select(*requested_fields) if not requested_fields: query._returning = () query = join(query, requested_joins) query = filter(query, filters, alias_map) query = order(requested_model, query, order_by, alias_map) query = paginate(query, page, paginate_by) if page and paginate_by or get_field_from_selections(selections, 'total'): # TODO: refactor 'total' if total_query: total = NodeList([total_query]).alias(TOTAL_FIELD) else: count = orig_query.select().count() count_node = [SQL(f'{count}')] total = NodeList(count_node).alias(TOTAL_FIELD) query._returning = tuple(query._returning) + (total,) if not query._returning: query = query.select(SQL('1')) # bottleneck return query return model
def Match(columns, expr, modifier=None): if isinstance(columns, (list, tuple)): match = fn.MATCH(*columns) # Tuple of one or more columns / fields. else: match = fn.MATCH(columns) # Single column / field. args = expr if modifier is None else NodeList((expr, SQL(modifier))) return NodeList((match, fn.AGAINST(args)))
def match_like(field, search_query): """ Generates a full-text match query using an ILIKE operation, which is needed for SQLite and Postgres. """ escaped_query = _escape_wildcard(search_query) clause = NodeList(("%" + escaped_query + "%", SQL("ESCAPE '!'"))) return Field.__pow__(field, clause)
def get_last1h_devices(self): if scanner.last_scan is None: self.chat.reply("⚠️ Scanner is not started yet") now = datetime.now() results = ScanResult\ .select()\ .where(ScanResult.time > now - timedelta(hours=1))\ .join(Device, JOIN.LEFT_OUTER)\ .join(Person, JOIN.LEFT_OUTER)\ .group_by(ScanResult.mac_addr)\ .having(fn.Max(ScanResult.time) == ScanResult.time)\ .order_by(-ScanResult.time, NodeList((Person.name, SQL('IS NULL'))), Person.name) msg_text = "Active in the last hour devices list\nAs of %s\n" % now.strftime( "%Y.%m.%d %X") for k, g in groupby(results, lambda x: x.time): age = int((now - k).seconds / 60) msg_text += "\n<b>%s min ago</b>\n" % str( age) if age > 0 else "\n<b>Now</b>\n" for r in g: if r.device: d = r.device msg_text += "• %s (%s) \n" % (d.owner.name if d.owner else "N/A", d.name or "N/A") else: msg_text += "• <code>%s</code>\n" % r.mac_addr self.chat.reply( msg_text, parse_mode='HTML', )
def prefix_search(field, prefix_query): """ Returns the wildcard match for searching for the given prefix query. """ # Escape the known wildcard characters. prefix_query = _escape_wildcard(prefix_query) return Field.__pow__(field, NodeList((prefix_query + "%", SQL("ESCAPE '!'"))))
def test_query(): exp = NodeList( [DailyTrend.metrics, SQL("->>'tag' in ('{}') ".format('阿努納奇'))], glue='') daily_trends = DailyTrend.select().where(exp) print(daily_trends.sql()) print(len(daily_trends))
def _fts_cmd_sql(cls, cmd, **extra_params): tbl = cls._meta.entity columns = [tbl] values = [cmd] for key, value in extra_params.items(): columns.append(Entity(key)) values.append(value) return NodeList( (SQL('INSERT INTO'), cls._meta.entity, EnclosedNodeList(columns), SQL('VALUES'), EnclosedNodeList(values)))
def _date_operator_compare(date, kwargs, dt_converts=datetime_converts): if '{0}_operator'.format(date) in kwargs: date_oper = getattr(OP, kwargs['{0}_operator'.format(date)].upper()) else: date_oper = OP.EQ if date_oper == OP.BETWEEN: date_obj_min = dt_converts(kwargs['{}_0'.format(date)]) date_obj_max = dt_converts(kwargs['{}_1'.format(date)]) date_obj = NodeList((date_obj_min, SQL('AND'), date_obj_max)) else: date_obj = dt_converts(kwargs[date]) return (date_obj, date_oper)
def parse_time(self, string): if string.endswith("h"): op = "BETWEEN" hour = string[:-1] low, high = convert_to_time(hour + ":00"), convert_to_time(hour + ":59") rhs = NodeList((low, SQL("AND"), high)) self.time = low else: op, rhs = "=", convert_to_time(string) self.time = rhs self.where_clause_exprs.append(Expression(self.Food.time, op, rhs))
def ddl(self, ctx): node_list = super().ddl(ctx) ag_auto_gen = SQL("GENERATED ALWAYS" if self.auto_gen_always else "") ag_col_type = SQL(self.auto_gen_column_type.upper()) return NodeList(( node_list, ag_auto_gen, SQL("AS"), EnclosedNodeList([ self.auto_gen_expression, ]), ag_col_type, ))
def suggestion(search: str, ratio=0.5, top=20): edit = int(len(search) * ratio) exp = NodeList([ SQL("levenshtein("), DataPoint.value, SQL(", '{}') <= {}".format(search, edit)), SQL(" order by levenshtein("), DataPoint.value, SQL(", '{}')".format(search)) ], glue='') datapoints = DataPoint.select().where(exp) tags = [] if datapoints.exists(): for datapoint in datapoints[:top]: tags.append(datapoint.value) return {'tags': tags}
def sql(self, column_name=None, is_null=None): if is_null is None: is_null = self.is_null if column_name is None: column_name = self.name parts = [Entity(column_name), SQL(self.definition)] if self.is_unique: parts.append(SQL('UNIQUE')) if is_null: parts.append(SQL('NULL')) else: parts.append(SQL('NOT NULL')) if self.is_pk: parts.append(SQL('PRIMARY KEY')) if self.extra: parts.append(SQL(self.extra)) return NodeList(parts)
def test_model_indexes_complex_columns(self): class Taxonomy(TestModel): name = CharField() name_class = CharField() class Meta: database = self.database name = NodeList((fn.LOWER(Taxonomy.name), SQL('varchar_pattern_ops'))) index = (Taxonomy .index(name, Taxonomy.name_class) .where(Taxonomy.name_class == 'scientific name')) Taxonomy.add_index(index) self.assertIndexes(Taxonomy, [ ('CREATE INDEX "taxonomy_name_class" ON "taxonomy" (' 'LOWER("name") varchar_pattern_ops, "name_class") ' 'WHERE ("name_class" = ?)', ['scientific name']), ])
def _create_virtual_table(self, safe=True, **options): options = self.model.clean_options( merge_dict(self.model._meta.options, options)) # Structure: # CREATE VIRTUAL TABLE <model> # USING <extension_module> # ([prefix_arguments, ...] fields, ... [arguments, ...], [options...]) ctx = self._create_context() ctx.literal('CREATE VIRTUAL TABLE ') if safe: ctx.literal('IF NOT EXISTS ') (ctx .sql(self.model) .literal(' USING ')) ext_module = self.model._meta.extension_module if isinstance(ext_module, Node): return ctx.sql(ext_module) ctx.sql(SQL(ext_module)).literal(' ') arguments = [] meta = self.model._meta if meta.prefix_arguments: arguments.extend([SQL(a) for a in meta.prefix_arguments]) # Constraints, data-types, foreign and primary keys are all omitted. for field in meta.sorted_fields: if isinstance(field, (RowIDField)) or field._hidden: continue field_def = [Entity(field.column_name)] if field.unindexed: field_def.append(SQL('UNINDEXED')) arguments.append(NodeList(field_def)) if meta.arguments: arguments.extend([SQL(a) for a in meta.arguments]) if options: arguments.extend(self._create_table_option_sql(options)) return ctx.sql(EnclosedNodeList(arguments))
def match_mysql(field, search_query): """ Generates a full-text match query using a Match operation, which is needed for MySQL. """ if field.name.find("`") >= 0: # Just to be safe. raise Exception( "How did field name '%s' end up containing a backtick?" % field.name) # Note: There is a known bug in MySQL (https://bugs.mysql.com/bug.php?id=78485) that causes # queries of the form `*` to raise a parsing error. If found, simply filter out. search_query = search_query.replace("*", "") # Just to be absolutely sure. search_query = search_query.replace("'", "") search_query = search_query.replace('"', "") search_query = search_query.replace("`", "") return NodeList((fn.MATCH(SQL( "`%s`" % field.name)), fn.AGAINST(SQL("%s", [search_query]))), parens=True)
def conflict_update(self, oc, query): action = oc._action.lower() if oc._action else '' if action in ('ignore', 'nothing'): parts = [SQL('ON CONFLICT')] if oc._conflict_target: parts.append( EnclosedNodeList([ Entity(col) if isinstance(col, basestring) else col for col in oc._conflict_target ])) parts.append(SQL('DO NOTHING')) return NodeList(parts) elif action in ('replace', 'upsert'): # No special stuff is necessary, this is just indicated by starting # the statement with UPSERT instead of INSERT. return elif oc._conflict_constraint: raise ValueError('CockroachDB does not support the usage of a ' 'constraint name. Use the column(s) instead.') return super(CockroachDatabase, self).conflict_update(oc, query)
def filter_query_with_subqueries(query, filters): """ For queries that does not support joining """ plain_filters = {} subquery_filters = {} model = query.model filters = prepare_filters(query, filters) for key, val in filters.items(): if is_filter_deep(model, key): join_field, join_filter_key = key.split(DELIM, 1) subquery_filters.setdefault(join_field, {}).update({join_filter_key: val}) else: plain_filters[key] = val query = filter_query(query, plain_filters) for key, val in subquery_filters.items(): field = getattr(model, key) rel_model = field.rel_model query = query.where(NodeList([ SQL('EXISTS'), rel_model.select(SQL('1')).filter(**val).where(field == rel_model._meta.primary_key) ])) return query
def _update_column(self, table, column_to_update, fn): columns = set(column.name.lower() for column in self.database.get_columns(table)) if column_to_update.lower() not in columns: raise ValueError('Column "%s" does not exist on "%s"' % (column_to_update, table)) # Get the SQL used to create the given table. table, create_table = self._get_create_table(table) # Get the indexes and SQL to re-create indexes. indexes = self.database.get_indexes(table) # Find any foreign keys we may need to remove. self.database.get_foreign_keys(table) # Make sure the create_table does not contain any newlines or tabs, # allowing the regex to work correctly. create_table = re.sub(r'\s+', ' ', create_table) # Parse out the `CREATE TABLE` and column list portions of the query. raw_create, raw_columns = self.column_re.search(create_table).groups() # Clean up the individual column definitions. split_columns = self.column_split_re.findall(raw_columns) column_defs = [col.strip() for col in split_columns] new_column_defs = [] new_column_names = [] original_column_names = [] for column_def in column_defs: column_name, = self.column_name_re.match(column_def).groups() if column_name == column_to_update: new_column_def = fn(column_name, column_def) if new_column_def: new_column_defs.append(new_column_def) original_column_names.append(column_name) column_name, = self.column_name_re.match( new_column_def).groups() new_column_names.append(column_name) else: new_column_defs.append(column_def) if not column_name.lower().startswith(('foreign', 'primary')): new_column_names.append(column_name) original_column_names.append(column_name) # Create a mapping of original columns to new columns. original_to_new = dict(zip(original_column_names, new_column_names)) new_column = original_to_new.get(column_to_update) fk_filter_fn = lambda column_def: column_def if not new_column: # Remove any foreign keys associated with this column. fk_filter_fn = lambda column_def: None elif new_column != column_to_update: # Update any foreign keys for this column. fk_filter_fn = lambda column_def: self.fk_re.sub( 'FOREIGN KEY ("%s") ' % new_column, column_def) cleaned_columns = [] for column_def in new_column_defs: match = self.fk_re.match(column_def) if match is not None and match.groups()[0] == column_to_update: column_def = fk_filter_fn(column_def) if column_def: cleaned_columns.append(column_def) # Update the name of the new CREATE TABLE query. temp_table = table + '__tmp__' rgx = re.compile('("?)%s("?)' % table, re.I) create = rgx.sub('\\1%s\\2' % temp_table, raw_create) # Create the new table. columns = ', '.join(cleaned_columns) queries = [ NodeList([SQL('DROP TABLE IF EXISTS'), Entity(temp_table)]), SQL('%s (%s)' % (create.strip(), columns)) ] # Populate new table. populate_table = NodeList( (SQL('INSERT INTO'), Entity(temp_table), EnclosedNodeList([Entity(col) for col in new_column_names]), SQL('SELECT'), CommaNodeList([Entity(col) for col in original_column_names]), SQL('FROM'), Entity(table))) drop_original = NodeList([SQL('DROP TABLE'), Entity(table)]) # Drop existing table and rename temp table. queries += [ populate_table, drop_original, self.rename_table(temp_table, table) ] # Re-create user-defined indexes. User-defined indexes will have a # non-empty SQL attribute. for index in filter(lambda idx: idx.sql, indexes): if column_to_update not in index.columns: queries.append(SQL(index.sql)) elif new_column: sql = self._fix_index(index.sql, column_to_update, new_column) if sql is not None: queries.append(SQL(sql)) return queries
def topic_filter(region_id: str, unit: str, search: str = None, start: datetime = None, end: datetime = None, topic_limit=100, sum: bool = False, lw: float = 0, vw: float = 0, cw: float = 0, rw: float = 1, dw: float = 0): if unit not in ['week', 'day', 'month', 'year']: raise ValueError("Invalid unit value") today = datetime.now() today = datetime(year=today.year, month=today.month, day=today.day) if end is None: end = today else: end = datetime(year=end.year, month=end.month, day=end.day) if start is None: start = end - relativedelta(days=unit_value[unit] + 2) region = Region.get(Region.region_id == region_id) result = { 'id': region.region_id, 'name': region.name, 'topic': [], 'geo': { 'lat': region.lat, 'lon': region.lon } } daily_trends = DailyTrend.select().where((DailyTrend.time >= start) & (DailyTrend.time <= end) & (DailyTrend.region == region)) if search is not None and len(search) > 0: exp = NodeList([ SQL("jsonb_message_to_tsvector("), DailyTrend.metrics, SQL(") @@ '{}'".format(search)) ], glue='') daily_trends = daily_trends.where(exp) daily_metrics = [] for trend in daily_trends: stats = [] for metric in trend.metrics: m_ = metric['stats'] m_['tag'] = metric['tag'].replace('#', '') m_['date'] = trend.time m_['category'] = metric['category'] stats.append(m_) df = pd.DataFrame(stats) # df['date'] = pd.to_datetime(df['date']) daily_metrics.append(df) if end >= today: from cache import LatestTrend try: trend = LatestTrend.get(LatestTrend.region_id == region_id) today_stats = trend.metrics except: today_stats = [] stats = [] for metric in today_stats: m_ = metric['stats'] m_['date'] = today m_['tag'] = metric['tag'].replace('#', '') if 'category' not in metric: m_['category'] = [-1] else: m_['category'] = metric['category'] stats.append(m_) if len(stats): df = pd.DataFrame(stats) daily_metrics.append(df) if len(daily_metrics) > 0: df = pd.concat(daily_metrics, axis=0) if search is not None and len(search) > 0: df = df.loc[df['tag'].str.contains(search, regex=False)] df.set_index('tag') has_col = False if 'category' in df.columns: df['category'] = [','.join(map(str, l)) for l in df['category']] has_col = True df = df.groupby(['tag', 'date', 'category']).mean() else: df = df.groupby(['tag', 'date']).mean() df['weight'] = (101 - df['rank']) * rw + ( (df['view']) * vw + (df['comment']) * cw + (df['like']) * lw - (df['dislike'] * dw)) / df['view'] df['tag'] = list([r[0] for r in df.index]) df['date'] = list( [r[1].strftime("%Y-%m-%dT%HH:%MM:%SS") for r in df.index]) if has_col: df['category'] = list([[int(float(l)) for l in r[2].split(',')] for r in df.index]) topics = df.to_dict(orient='records') result['topic'] = topics return result
def trending_topic(region_id, unit: str, search: str = None, start: datetime = None, end: datetime = None, sum: bool = False, topic_limit=100, lw: float = 1, vw: float = 1, cw: float = 1, rw: float = 1, dw: float = 1): today = datetime.now() today = datetime(year=today.year, month=today.month, day=today.day) if end is None: end = today if start is None: start = end - relativedelta(days=unit_value[unit] + 2) print(start, end) region = Region.get(Region.region_id == region_id) result = { 'id': region.region_id, 'name': region.name, 'topic': [], 'geo': { 'lat': region.lat, 'lon': region.lon } } daily_trends = DailyTrend.select().where((DailyTrend.time >= start) & (DailyTrend.time <= end) & (DailyTrend.region == region)) if search is not None and len(search) > 0: exp = NodeList([ SQL("jsonb_message_to_tsvector("), DailyTrend.metrics, SQL(") @@ '{}'".format(search)) ], glue='') daily_trends = daily_trends.where(exp) print('size', len(daily_trends)) daily_metrics = [] for trend in daily_trends: stats = [] for metric in trend.metrics: m_ = metric['stats'] m_['tag'] = metric['tag'].replace('#', '') m_['date'] = trend.time if 'category' not in metric: m_['category'] = [-1] else: m_['category'] = metric['category'] stats.append(m_) df = pd.DataFrame(stats) if len(df) > 0: daily_metrics.append(df) if end >= today: from cache import LatestTrend try: trend = LatestTrend.get(LatestTrend.region_id == region_id) today_stats = trend.metrics except: today_stats = [] stats = [] for metric in today_stats: m_ = metric['stats'] m_['tag'] = metric['tag'].replace('#', '') m_['date'] = today if 'category' not in metric: m_['category'] = [-1] else: m_['category'] = metric['category'] stats.append(m_) if len(stats): df = pd.DataFrame(stats) if len(df) > 0: daily_metrics.append(df) print('m size', len(daily_metrics)) if len(daily_metrics) > 0: df = pd.concat(daily_metrics, axis=0) if search is not None and len(search) > 0: df = df.loc[df['tag'].str.contains(search, regex=False)] df.set_index('tag') df = df.drop(columns=["date"]) if 'category' in df.columns: # df['category'] = [','.join(map(str, l)) for l in df['category']] # df = df.groupby(['tag', 'category'],as_index=False).mean() f2 = lambda x: [z for y in x for z in y] f1 = lambda x: ', '.join(x.dropna()) d = dict.fromkeys( df[['tag', 'category']].columns.difference(['tag', 'category']), f1) d['category'] = f2 df1 = df.groupby('tag', as_index=False).agg(d) df2 = df[['tag', 'rank', 'view', 'comment', 'like', 'dislike']].groupby(['tag'], as_index=False).mean() df = pd.concat([df1.set_index('tag'), df2.set_index('tag')], axis=1, join='inner').reset_index() else: df = df.groupby(['tag'], as_index=False).mean() df['weight'] = (101 - df['rank']) * rw + ( (df['view']) * vw + (df['comment']) * cw + (df['like']) * lw - (df['dislike'] * dw)) / df['view'] # df['tag'] = [ r[0] for r in df.index] # df['category'] = [ r[1] for r in df.index] topics = df.to_dict(orient='records') topics.sort(key=lambda x: x['weight'], reverse=True) result['topic'] = [] for t in topics[:topic_limit]: e = { 'tag': t['tag'], 'weight': t['weight'], 'rank': t['rank'], 'view': t['view'], 'like': t['like'], 'dislike': t['like'], 'comment': t['comment'] } if 'category' in t: e['category'] = list(set(t['category'])) result['topic'].append(e) return result
def searchMetadata( id=None, extension=None, mainDescription=None, sourceType=None, created=None, lastHit=None, source=None, tags=None, tagOr=False, # default link tags AND random=False, limit=1, updateHit=False): query = Media.select(Media.id, Media.extension, Media.mainDescription, Media.additionalData, Media.created, Media.lastHit, Media.sourceType, Media.source).where(Media.disabled == False) if id is not None: query = query.where(Media.id == id) if lastHit is not None: query = query.where(Media.lastHit > lastHit) if created is not None: query = query.where(Media.created > created) if mainDescription is not None: query = query.where(Media.mainDescription == mainDescription) if sourceType is not None: query = query.where(Media.sourceType == sourceType) if source is not None: query = query.where(Media.source == source) if extension is not None: query = query.where(Media.extension == extension) if not query.exists(): return None if updateHit: media_ids = [media.id for media in query.limit(limit)] Media.update({ Media.lastHit: datetime.datetime.now() }).where(Media.id << media_ids).execute() # return list(query.limit(limit).dicts()) queryBackup = query if tags is not None: idListPerTag = [] if tagOr: query = query.join( Description, on=(Description.id == Media.id)).where( NodeList((fn.Match(Description.text), fn.Against( NodeList((" ".join(tags), SQL("IN BOOLEAN MODE"))))))) else: query = query.join( Description, on=(Description.id == Media.id)).where( NodeList( (fn.Match(Description.text), fn.Against( NodeList( (" ".join(["+" + tag for tag in tags]), SQL("IN BOOLEAN MODE"))))))) if query.limit(limit).count() < limit: query = queryBackup query = query.join( Description, on=(Description.id == Media.id)).where( NodeList((fn.Match(Description.text), fn.Against( NodeList( (" ".join(tags), SQL("IN NATURAL LANGUAGE MODE"))))))) if random: _uuid = uuid4() queryBackup = query query = query.order_by(Media.id).where(Media.id > _uuid) if query.limit(limit).count() < limit: query = queryBackup.order_by( Media.id.desc()).where(Media.id < _uuid) return list(query.limit(limit).dicts())
def get_tags(tag: str, start: str = None, end: str = None, unit: str = "day", ratio: float = 1, top: int = 5): if unit not in ['week', 'day', 'month', 'year']: return { 'status': 'error', 'msg': "unit should be :week, day, month, year" } if start is not None: start = dateparser.parse(str(start)) end = datetime.now() if end is not None: end = dateparser.parse(str(end)) if start is None: start = end - relativedelta(days=1000) if end is not None and start is not None: if not validate_daterange(start, end): return { 'status': 'error', 'msg': "Invalid daterange, start date must be earlier than end date" } daily_metrics = [] edit = int(len(tag) * ratio) exp = NodeList([ SQL("levenshtein("), DataPoint.value, SQL(", '{}') <= {}".format(tag, edit)), SQL(" order by levenshtein("), DataPoint.value, SQL(", '{}')".format(tag)) ], glue='') datapoints = DataPoint.select().where(exp) if datapoints.exists(): for datapoint in datapoints[:top]: datapoint_metrics = [] for point in datapoint.metrics: m = point m.pop('tag') m['region'] = datapoint.region.region_id time = datetime.strptime(m['time'].split(' ')[0], "%Y-%m-%d") if time >= start and time <= end: datapoint_metrics.append(m) daily_metrics.append({ 'tag': datapoint.value, 'data': datapoint_metrics }) return { 'status': 'ok', 'date': { 'start': start.strftime('%Y-%m-%d'), 'end': end.strftime('%Y-%m-%d') }, 'results': daily_metrics }
def cast_jsonb(node): return NodeList((node, SQL('::jsonb')), glue='')
def ddl_datatype(self, ctx): data_type = self.__field.ddl_datatype(ctx) return NodeList((data_type, SQL('[]' * self.dimensions)), glue='')
def add_unique(self, table, *column_names): constraint_name = 'uniq_%s' % '_'.join(column_names) constraint = NodeList(( SQL('UNIQUE'), EnclosedNodeList([Entity(column) for column in column_names]))) return self.add_constraint(table, constraint_name, constraint)
def test_date_time_math_mysql(self): nl = NodeList((SQL('INTERVAL'), Schedule.interval, SQL('SECOND'))) next_occurrence = fn.date_add(Task.last_run, nl) self._do_test_date_time_math(next_occurrence)
def ddl(self, ctx): node_list = super(AutoIncrementField, self).ddl(ctx) return NodeList((node_list, SQL('AUTOINCREMENT')))