def testParse(self): query_text = """ SELECT 'comment' AS type, date_added, post.title AS title, keyvalue.value || ': ' || comment.body AS body, '?Post:' || comment.post_id || '#Comments' AS url FROM comment LEFT JOIN json USING (json_id) LEFT JOIN json AS json_content ON (json_content.directory = json.directory AND json_content.file_name='content.json') LEFT JOIN keyvalue ON (keyvalue.json_id = json_content.json_id AND key = 'cert_user_id') LEFT JOIN post ON (comment.post_id = post.post_id) WHERE post.date_added > 123 ORDER BY date_added DESC LIMIT 20 """ query = DbQuery(query_text) assert query.parts["LIMIT"] == "20" assert query.fields["body"] == "keyvalue.value || ': ' || comment.body" assert re.sub("[ \r\n]", "", str(query)) == re.sub("[ \r\n]", "", query_text) query.wheres.append("body LIKE '%hello%'") assert "body LIKE '%hello%'" in str(query)
def actionFeedQuery(self, to, limit=10, day_limit=3): from Site import SiteManager rows = [] stats = [] total_s = time.time() num_sites = 0 for address, site_data in list(self.user.sites.items()): feeds = site_data.get("follow") if not feeds: continue if type(feeds) is not dict: self.log.debug("Invalid feed for site %s" % address) continue num_sites += 1 for name, query_set in feeds.items(): site = SiteManager.site_manager.get(address) if not site or not site.storage.has_db: continue s = time.time() try: query_raw, params = query_set query_parts = re.split(r"UNION(?:\s+ALL|)", query_raw) for i, query_part in enumerate(query_parts): db_query = DbQuery(query_part) if day_limit: where = " WHERE %s > strftime('%%s', 'now', '-%s day')" % ( db_query.fields.get("date_added", "date_added"), day_limit) if "WHERE" in query_part: query_part = re.sub( "WHERE (.*?)(?=$| GROUP BY)", where + " AND (\\1)", query_part) else: query_part += where query_parts[i] = query_part query = " UNION ".join(query_parts) if ":params" in query: query_params = map(helper.sqlquote, params) query = query.replace(":params", ",".join(query_params)) res = site.storage.query( query + " ORDER BY date_added DESC LIMIT %s" % limit) except Exception as err: # Log error self.log.error("%s feed query %s error: %s" % (address, name, Debug.formatException(err))) stats.append({ "site": site.address, "feed_name": name, "error": str(err) }) continue for row in res: row = dict(row) if not isinstance(row["date_added"], (int, float, complex)): self.log.debug("Invalid date_added from site %s: %r" % (address, row["date_added"])) continue if row["date_added"] > 1000000000000: # Formatted as millseconds row["date_added"] = row["date_added"] / 1000 if "date_added" not in row or row[ "date_added"] > time.time() + 120: self.log.debug( "Newsfeed item from the future from from site %s" % address) continue # Feed item is in the future, skip it row["site"] = address row["feed_name"] = name rows.append(row) stats.append({ "site": site.address, "feed_name": name, "taken": round(time.time() - s, 3) }) time.sleep(0.001) return self.response( to, { "rows": rows, "stats": stats, "num": len(rows), "sites": num_sites, "taken": round(time.time() - total_s, 3) })
def actionFeedSearch(self, to, search, limit=30, day_limit=30): if "ADMIN" not in self.site.settings["permissions"]: return self.response(to, "FeedSearch not allowed") from Site import SiteManager rows = [] stats = [] num_sites = 0 total_s = time.time() search_text, filters = self.parseSearch(search) for address, site in SiteManager.site_manager.list().items(): if not site.storage.has_db: continue if "site" in filters: if filters["site"].lower() not in [ site.address, site.content_manager.contents["content.json"].get( "title").lower() ]: continue if site.storage.db: # Database loaded feeds = site.storage.db.schema.get("feeds") else: try: feeds = site.storage.loadJson("dbschema.json").get("feeds") except: continue if not feeds: continue num_sites += 1 for name, query in feeds.items(): s = time.time() try: db_query = DbQuery(query) params = [] # Filters if search_text: db_query.wheres.append("(%s LIKE ? OR %s LIKE ?)" % (db_query.fields["body"], db_query.fields["title"])) search_like = "%" + search_text.replace(" ", "%") + "%" params.append(search_like) params.append(search_like) if filters.get("type") and filters["type"] not in query: continue if day_limit: db_query.wheres.append( "%s > strftime('%%s', 'now', '-%s day')" % (db_query.fields.get("date_added", "date_added"), day_limit)) # Order db_query.parts["ORDER BY"] = "date_added DESC" db_query.parts["LIMIT"] = str(limit) res = site.storage.query(str(db_query), params) except Exception as err: self.log.error("%s feed query %s error: %s" % (address, name, Debug.formatException(err))) stats.append({ "site": site.address, "feed_name": name, "error": str(err), "query": query }) continue for row in res: row = dict(row) if not row["date_added"] or row["date_added"] > time.time( ) + 120: continue # Feed item is in the future, skip it row["site"] = address row["feed_name"] = name rows.append(row) stats.append({ "site": site.address, "feed_name": name, "taken": round(time.time() - s, 3) }) return self.response( to, { "rows": rows, "num": len(rows), "sites": num_sites, "taken": round(time.time() - total_s, 3), "stats": stats })