Ejemplo n.º 1
0
def _get_new_entries(self, db_feed: RSS, rss_parsed) -> list:
    """
    Grabs new entries from an RSS feed

    :param db_feed: MongoEngine RSS feed object
    :param rss_parsed: parsed & checked RSS feed
    :return: list of new entries
    """
    if len(rss_parsed.entries) == 0:
        return []

    last_entry_link = rss_parsed.entries[0].link

    if last_entry_link == db_feed.last_entry_link:
        return []

    entries = []

    for entry in rss_parsed.entries:
        if entry.link == db_feed.last_entry_link:
            break

        entries.append(entry)

    db_feed.last_entry_link = last_entry_link
    db_feed.save()

    return entries
Ejemplo n.º 2
0
 def handle_result(self, rpc, url):
     try:
         result = rpc.get_result()
         if result.status_code == 200:
             rsslist = []
             realrss = set()
             for rssText in result.content.split("\r\n"):
                 if rssText:
                     rss = RSS()
                     r = rssText.split("$$")
                     if len(r) >= 2:
                         rss.code = r[0]
                         realrss.add(rss.code)
                         rss.rssUrl = r[1]
                         if RSS().all().filter("code =", rss.code).filter("rssUrl =", rss.rssUrl).count() == 0:
                             logging.error(rss.code + "||" + rss.rssUrl)
                             r = RSS().all().filter("code =", rss.code).fetch(1)
                             if r:
                                 logging.error(rss.code + "||" + rss.rssUrl)
                                 clist = Contents().all().filter("rss =", r[0])
                                 for c in clist:
                                     c.status = "1"
                                     c.put()
                                 db.delete(r)
                             rsslist.append(rss)
             db.put(rsslist)
             deleteRss = []
             for dbrss in RSS().all():
                 if dbrss.code not in realrss:
                     deleteRss.append(dbrss)
             db.delete(deleteRss)
     except Exception, e:
         logging.error("0000" + str(e) + url)
Ejemplo n.º 3
0
 def addItem(self, c, item_key):
     item = RSS(title = c.title,
                    description = c.description,
                    published = c.published,
                    published_parsed = mktime(c.published_parsed),
                    link = c.link,
                    content = c.content[0]['value'],
                    parent = item_key)
     item.put()
Ejemplo n.º 4
0
 def get(self):
     q = RSS.query().order(-RSS.published_parsed)
     items = []
     for item in q:
         if item.key.parent().id() == 5704755951173632:
             # logging.error(item.key.parent())
             item.key.delete()
     self.render('test.html')
Ejemplo n.º 5
0
 def get(self, feed_id):
     q = RSS.query().order(-RSS.published_parsed)
     items = []
     for item in q:
         if item.key.parent().id() == int(feed_id):
         	# logging.error(item.key.parent())
             items.append(item)
     self.render('reader.html', items = items)
Ejemplo n.º 6
0
 def get(self):
     rssid = self.request.get("rss")
     c = (
         Contents.all()
         .filter("rss =", RSS.get_by_id(int(rssid)))
         .filter("realContentResult >", 0)
         .filter("realContentResult !=", None)
     )
     self.render("templates/detailLook.html", {"content": c})
Ejemplo n.º 7
0
 def get(self):
     
     feeds = RSSinfo.query().fetch()
     item_count = []
     for feed in feeds:
         qty = RSS.query(ancestor = ndb.Key(feed.link, feed.url)).count()
         item_count.append(qty)
     # logging.error(item_count)
     iter_list = range(len(feeds))
     self.render('/index.html', feeds = feeds, item_count = item_count, iter_list = iter_list)
Ejemplo n.º 8
0
 def get(self):
     rsslist = []
     for r in RSS.all():
         r.r0 = Contents.all().filter("rss =", r).filter("realContentResult =", 0).count()
         r.r1 = Contents.all().filter("rss =", r).filter("realContentResult =", 1).count()
         r.r2 = Contents.all().filter("rss =", r).filter("realContentResult =", 2).count()
         r.r3 = Contents.all().filter("rss =", r).filter("realContentResult =", 3).count()
         r.r4 = Contents.all().filter("rss =", r).filter("realContentResult =", 4).count()
         r.r5 = Contents.all().filter("rss =", r).filter("realContentResult =", 5).count()
         r.r6 = Contents.all().filter("rss =", r).filter("realContentResult =", None).count()
         rsslist.append(r)
     self.render("templates/analysis.html", {"RSSs": rsslist})
Ejemplo n.º 9
0
 def post(self):
     rssUrl = self.request.get("rssUrl").strip()
     if RSS.all().filter("rssUrl =", rssUrl).count() == 0 and rssUrl:
         rss = RSS()
         rss.rssUrl = rssUrl
         rss.put()
     self.render("templates/index.html", {"RSSs": RSS.all()})
Ejemplo n.º 10
0
    def get(self):
        self.saveRssList = []
        self.saveRssMap = {}
        self.urls = []
        rsslist = memcache.get("rsslist")
        if not rsslist:
            rsslist = []
            for r in RSS.all().order("updateTime"):
                rsslist.append(r)
            memcache.set("rsslist", rsslist, 36000)
        for rss in rsslist:
            if not rss.code:
                self.urls.append((rss, rss.rssUrl))

        self.searchRSS()
Ejemplo n.º 11
0
 def get(self):
     cid = self.request.get("content")
     if cid:
         content = Contents.get_by_id(int(cid))
         self.render("templates/look.html", {"content": content, "view": True})
         return
     rssid = self.request.get("rss")
     if rssid:
         c = Contents.all().filter("realContentResult =", 0).filter("rss =", RSS.get_by_id(int(rssid))).fetch(1)
     if not rssid or not c:
         c = Contents.all().filter("realContentResult =", 0).fetch(1)
     if c:
         content = c[0]
         self.render("templates/look.html", {"content": content})
     else:
         self.redirect("/")
Ejemplo n.º 12
0
 def get(self):
     client = ParserClient(token='64c0f2ae58811bc3d09104e8d22abb3e3b328971')
     feeds = RSSinfo.query()
     for feed in feeds:
         if feed.get_full_article == True:
             items = RSS.query(ancestor = feed.key)
             for item in items:
                 if item.content == 'no content':
                     parser_response = client.get_article(url = item.link)
                     sleep(1)
                     article = parser_response.json()
                     item.content = article['content']
                     item.put()
                 else:
                     pass
         else:
             pass
Ejemplo n.º 13
0
 def get(self):
     feeds = RSSinfo.query()
     for feed in feeds:
         item_key = feed.key
         last_item = RSS.query(ancestor = item_key).order(-RSS.published_parsed).get()
         d = feedparser.parse(feed.url)
         num = len(d.entries)
         for i in range(num):
             c = d.entries[i]
             if last_item == None:
                 self.sanitize_item(c, item_key)
                 self.addItem(c, item_key)
             else:
                 self.sanitize_item(c, item_key)
                 if mktime(c.published_parsed) > last_item.published_parsed:
                     self.addItem(c, item_key)
                 else:
                     pass
     self.response.set_status(204)                    
Ejemplo n.º 14
0
    def get(self):
        self.saveRssList = []
        self.saveRssMap = {}
        self.urls = []
        rsslist = memcache.get("rsslist")
        if not rsslist:
            rsslist = []
            for r in RSS.all().order("updateTime"):
                rsslist.append(r)
            memcache.set("rsslist", rsslist, 36000)
        #        import random
        #        s=random.randint(0,len(rsslist)-1)
        #        if  s%2==1:
        #            rsslist.reverse()
        #        s=random.randint(0,len(rsslist)-1)
        ls = rsslist[:50]
        rsslist = rsslist[50:] + ls
        memcache.set("rsslist", rsslist, 36000)
        for rss in ls:
            if rss.code:
                self.urls.append((rss, rss.rssUrl))

        self.searchRSS()
Ejemplo n.º 15
0
def rss_compile(update, context, user, link) -> str:
    """
    Handler: fsm:2.1 -> 3

    :param user: mongoengine User object
    :param link: the extracted entity from the message
    """
    news = utility.parse_url(link)
    language = user.settings.language
    state = user.settings.fsm_state

    # check the source for possible errors, such as bozo and format
    if not utility.check_source(news):
        return txt['CALLBACK']['error_link'][language]

    # check the actual feed, i.e.:
    # stuff like title, subtitle, link and such.
    checked_feed = utility.check_parsed(
        news.feed, config['SCRAPE']['RSS']['req_feed_keys'])

    # implement the checking described above
    if not checked_feed:
        return txt['CALLBACK']['error_feed'][language]

    # all entries must be checked for certain required elements
    # this must strike a fine balance between universality and enough
    # information for a good display of the RSS feed
    checked_all_entries = all([
        utility.check_parsed(x, config['SCRAPE']['RSS']['req_entry_keys'])
        for x in news.entries
    ])

    # implement the checking above
    if not checked_all_entries:
        return txt['CALLBACK']['error_entries'][language]

    # if all the checks have so far been passed, then we create the RSS
    # feed in our database and register it - unless it already exists for the
    # user.
    try:
        db_news = RSS(
            rss_link=link,
            link=news.feed.link,
            title=news.feed.title,
            subtitle=news.feed.get('subtitle', ''),
            summary=news.feed.get('summary', ''),
        )
        db_news.subscribed.append(user.user_id)
        db_news.save()

    # if an identical RSS feed exists instead of saving, we fetch the existing
    except errors.NotUniqueError:
        db_news = RSS.get_rss(rss_link=link)

        if user.user_id in db_news.subscribed:
            return txt['CALLBACK']['repeated_rss'][language]

        db_news.subscribed.append(user.user_id)
        db_news.meta_info.fetched = True
        db_news.save()

    user.subscribed.rss_list.append(db_news.pk)
    user.subscribed.session_list.append(news.feed.title)
    user.save()

    feed_formatted = f"<a href=\"{db_news.link}\">" + \
        f"{utility.escape(db_news.title)}</a>"

    if not len(news.entries) > 0:
        return txt['CALLBACK']['empty_feed'][language]

    db_news.last_entry_link = news.entries[0].link
    db_news.save()

    # because this function is used both in the setup and post-setup, we assign
    # a special 'b' sub-state that the user never takes on, but that contains
    # the buttons required to move on to the next FSM state.
    return txt['FSM'][f'{state}b']['text'][language].format(feed_formatted)
Ejemplo n.º 16
0
 def get(self):
     self.render("templates/index.html", {"RSSs": RSS.all()})
Ejemplo n.º 17
0
 def count(self, feed):
     res = RSS.query(ancestor = ndb.Key(feed.link, feed.url)).count()
     return res