Beispiel #1
0
	def get(tags=None):
		try:
			coll = getCollection("news", "articles")
			tags = None # skips tags
			if tags: # BROKEN - skipped for now
				print "There are tags!"
				tags = tags.lower().split(',')
				can_have = [ {"tags" : t[1:]} for t in tags if t[0] != '^' ]
				#cant_have = [ {"tags" : t[1:]} for t in tags if t[0] == '^' ]
				params = {"$or" : can_have}
				articles = coll.find(params)
			else:
				articles = coll.find()
		except:
			articles = [news.createArticle("Error", "Error", "Error", "Error", "Error")]
		return {"articles" : articles.sort("timestamp", -1).limit(50)}
Beispiel #2
0
def main():
    import news
    from db_wrapper import loadCredentials, getDatabase, getCollection, addArticles

    try:
        creds = loadCredentials()
    except Exception as e:
        creds = None

    while True:
        out = []
        try:
            new_articles = []
            source_fns = [news.NYT_mostPopular, news.NPR_news, news.HN_frontPage, news.TNY_news, news.AP_topNews]
            for src in source_fns:
                try:
                    new_articles.extend(src())
                except Exception as e:
                    print "Error with source", src
                    print "\t", e

            coll = getCollection("news", "articles", creds)
            print "number of old articles: %d" % coll.count()

            added = 0
            for a in new_articles:
                print "a.url = %s" % a["url"]
                x = coll.find_one({"url": a["url"]})
                if x:
                    print "\t already exists: %d" % x["timestamp"]
                if not x:
                    print "\t Added to DB"
                    coll.insert(a)
                    added += 1
            print "Added %d new articles" % added
        except Exception as e:
            print "Failed to save to database, see stderr logs"
            print >> sys.stderr, "Exception:", e
        time.sleep(120)  # repeat this command ever 3 minutes
Beispiel #3
0
	def viewtext():
		url = request.GET.get('url', "ERROR, please try a different link")
		try:
			DBarticles = getCollection("news", "articles")
			match = DBarticles.find_one({"url":url})
			if match:
				out = match['html']
				print "Cache-hit => %s" % url
				if not out:
					print "\tNope, refetching"
					title, _, body = news.viewtext(url)
					try:
						body = news.html_escape(body) # clean up the body?
					except Exception as e:
						print "!!! Could not HTML escape the body of text"
						print e
					out = json.dumps({"title":match['title'], "body":body, "url":url})
					match['html'] = out
					DBarticles.save(match) # can't change size on capped collection - how to fix?
				return  out
		except Exception as e:
			print >> sys.stderr, e
		
		return {"title":"Error" ,"body":"Error", "url":"Error"}