def _get_new_entries(self, db_feed: RSS, rss_parsed) -> list: """ Grabs new entries from an RSS feed :param db_feed: MongoEngine RSS feed object :param rss_parsed: parsed & checked RSS feed :return: list of new entries """ if len(rss_parsed.entries) == 0: return [] last_entry_link = rss_parsed.entries[0].link if last_entry_link == db_feed.last_entry_link: return [] entries = [] for entry in rss_parsed.entries: if entry.link == db_feed.last_entry_link: break entries.append(entry) db_feed.last_entry_link = last_entry_link db_feed.save() return entries
def handle_result(self, rpc, url): try: result = rpc.get_result() if result.status_code == 200: rsslist = [] realrss = set() for rssText in result.content.split("\r\n"): if rssText: rss = RSS() r = rssText.split("$$") if len(r) >= 2: rss.code = r[0] realrss.add(rss.code) rss.rssUrl = r[1] if RSS().all().filter("code =", rss.code).filter("rssUrl =", rss.rssUrl).count() == 0: logging.error(rss.code + "||" + rss.rssUrl) r = RSS().all().filter("code =", rss.code).fetch(1) if r: logging.error(rss.code + "||" + rss.rssUrl) clist = Contents().all().filter("rss =", r[0]) for c in clist: c.status = "1" c.put() db.delete(r) rsslist.append(rss) db.put(rsslist) deleteRss = [] for dbrss in RSS().all(): if dbrss.code not in realrss: deleteRss.append(dbrss) db.delete(deleteRss) except Exception, e: logging.error("0000" + str(e) + url)
def addItem(self, c, item_key): item = RSS(title = c.title, description = c.description, published = c.published, published_parsed = mktime(c.published_parsed), link = c.link, content = c.content[0]['value'], parent = item_key) item.put()
def get(self): q = RSS.query().order(-RSS.published_parsed) items = [] for item in q: if item.key.parent().id() == 5704755951173632: # logging.error(item.key.parent()) item.key.delete() self.render('test.html')
def get(self, feed_id): q = RSS.query().order(-RSS.published_parsed) items = [] for item in q: if item.key.parent().id() == int(feed_id): # logging.error(item.key.parent()) items.append(item) self.render('reader.html', items = items)
def get(self): rssid = self.request.get("rss") c = ( Contents.all() .filter("rss =", RSS.get_by_id(int(rssid))) .filter("realContentResult >", 0) .filter("realContentResult !=", None) ) self.render("templates/detailLook.html", {"content": c})
def get(self): feeds = RSSinfo.query().fetch() item_count = [] for feed in feeds: qty = RSS.query(ancestor = ndb.Key(feed.link, feed.url)).count() item_count.append(qty) # logging.error(item_count) iter_list = range(len(feeds)) self.render('/index.html', feeds = feeds, item_count = item_count, iter_list = iter_list)
def get(self): rsslist = [] for r in RSS.all(): r.r0 = Contents.all().filter("rss =", r).filter("realContentResult =", 0).count() r.r1 = Contents.all().filter("rss =", r).filter("realContentResult =", 1).count() r.r2 = Contents.all().filter("rss =", r).filter("realContentResult =", 2).count() r.r3 = Contents.all().filter("rss =", r).filter("realContentResult =", 3).count() r.r4 = Contents.all().filter("rss =", r).filter("realContentResult =", 4).count() r.r5 = Contents.all().filter("rss =", r).filter("realContentResult =", 5).count() r.r6 = Contents.all().filter("rss =", r).filter("realContentResult =", None).count() rsslist.append(r) self.render("templates/analysis.html", {"RSSs": rsslist})
def post(self): rssUrl = self.request.get("rssUrl").strip() if RSS.all().filter("rssUrl =", rssUrl).count() == 0 and rssUrl: rss = RSS() rss.rssUrl = rssUrl rss.put() self.render("templates/index.html", {"RSSs": RSS.all()})
def get(self): self.saveRssList = [] self.saveRssMap = {} self.urls = [] rsslist = memcache.get("rsslist") if not rsslist: rsslist = [] for r in RSS.all().order("updateTime"): rsslist.append(r) memcache.set("rsslist", rsslist, 36000) for rss in rsslist: if not rss.code: self.urls.append((rss, rss.rssUrl)) self.searchRSS()
def get(self): cid = self.request.get("content") if cid: content = Contents.get_by_id(int(cid)) self.render("templates/look.html", {"content": content, "view": True}) return rssid = self.request.get("rss") if rssid: c = Contents.all().filter("realContentResult =", 0).filter("rss =", RSS.get_by_id(int(rssid))).fetch(1) if not rssid or not c: c = Contents.all().filter("realContentResult =", 0).fetch(1) if c: content = c[0] self.render("templates/look.html", {"content": content}) else: self.redirect("/")
def get(self): client = ParserClient(token='64c0f2ae58811bc3d09104e8d22abb3e3b328971') feeds = RSSinfo.query() for feed in feeds: if feed.get_full_article == True: items = RSS.query(ancestor = feed.key) for item in items: if item.content == 'no content': parser_response = client.get_article(url = item.link) sleep(1) article = parser_response.json() item.content = article['content'] item.put() else: pass else: pass
def get(self): feeds = RSSinfo.query() for feed in feeds: item_key = feed.key last_item = RSS.query(ancestor = item_key).order(-RSS.published_parsed).get() d = feedparser.parse(feed.url) num = len(d.entries) for i in range(num): c = d.entries[i] if last_item == None: self.sanitize_item(c, item_key) self.addItem(c, item_key) else: self.sanitize_item(c, item_key) if mktime(c.published_parsed) > last_item.published_parsed: self.addItem(c, item_key) else: pass self.response.set_status(204)
def get(self): self.saveRssList = [] self.saveRssMap = {} self.urls = [] rsslist = memcache.get("rsslist") if not rsslist: rsslist = [] for r in RSS.all().order("updateTime"): rsslist.append(r) memcache.set("rsslist", rsslist, 36000) # import random # s=random.randint(0,len(rsslist)-1) # if s%2==1: # rsslist.reverse() # s=random.randint(0,len(rsslist)-1) ls = rsslist[:50] rsslist = rsslist[50:] + ls memcache.set("rsslist", rsslist, 36000) for rss in ls: if rss.code: self.urls.append((rss, rss.rssUrl)) self.searchRSS()
def rss_compile(update, context, user, link) -> str: """ Handler: fsm:2.1 -> 3 :param user: mongoengine User object :param link: the extracted entity from the message """ news = utility.parse_url(link) language = user.settings.language state = user.settings.fsm_state # check the source for possible errors, such as bozo and format if not utility.check_source(news): return txt['CALLBACK']['error_link'][language] # check the actual feed, i.e.: # stuff like title, subtitle, link and such. checked_feed = utility.check_parsed( news.feed, config['SCRAPE']['RSS']['req_feed_keys']) # implement the checking described above if not checked_feed: return txt['CALLBACK']['error_feed'][language] # all entries must be checked for certain required elements # this must strike a fine balance between universality and enough # information for a good display of the RSS feed checked_all_entries = all([ utility.check_parsed(x, config['SCRAPE']['RSS']['req_entry_keys']) for x in news.entries ]) # implement the checking above if not checked_all_entries: return txt['CALLBACK']['error_entries'][language] # if all the checks have so far been passed, then we create the RSS # feed in our database and register it - unless it already exists for the # user. try: db_news = RSS( rss_link=link, link=news.feed.link, title=news.feed.title, subtitle=news.feed.get('subtitle', ''), summary=news.feed.get('summary', ''), ) db_news.subscribed.append(user.user_id) db_news.save() # if an identical RSS feed exists instead of saving, we fetch the existing except errors.NotUniqueError: db_news = RSS.get_rss(rss_link=link) if user.user_id in db_news.subscribed: return txt['CALLBACK']['repeated_rss'][language] db_news.subscribed.append(user.user_id) db_news.meta_info.fetched = True db_news.save() user.subscribed.rss_list.append(db_news.pk) user.subscribed.session_list.append(news.feed.title) user.save() feed_formatted = f"<a href=\"{db_news.link}\">" + \ f"{utility.escape(db_news.title)}</a>" if not len(news.entries) > 0: return txt['CALLBACK']['empty_feed'][language] db_news.last_entry_link = news.entries[0].link db_news.save() # because this function is used both in the setup and post-setup, we assign # a special 'b' sub-state that the user never takes on, but that contains # the buttons required to move on to the next FSM state. return txt['FSM'][f'{state}b']['text'][language].format(feed_formatted)
def get(self): self.render("templates/index.html", {"RSSs": RSS.all()})
def count(self, feed): res = RSS.query(ancestor = ndb.Key(feed.link, feed.url)).count() return res