def TechCrunch(latest): logging.info("TECH CRUNCH CALLED") url = "http://feeds.feedburner.com/TechCrunch/" feed = feedparser.parse(url) for item in feed["items"]: dt = standardize("%a, %d %b %Y %H:%M:%S +0000", item["published"]) ts = makeTimestamp(dt) if int(ts) > int(latest): # there are new articles sum_article = SummarizeUrl(item["links"][0]["href"]) full_article = grab_link(item["links"][0]["href"]).cleaned_text #logging.info(full_article) if len(sum_article) >= 3: ArticleModel(title = item["title"], author = item["author"], published = str(dt), published_timestamp = int(ts), image_url = str(item["media_content"][0]["url"]), publication = "TechCrunch", summarized_article = sum_article, full_article = full_article, upvoters = [], upvotes = 0 ).put() else: # there are no new articles return
def TheVerge(latest): url = "http://www.theverge.com/rss/index.xml" feed = feedparser.parse(url) dt = 0 for item in feed["items"]: try: dt = standardize("%Y-%m-%d %H:%M:%S-04:00", item["published"].replace("T", " ")) except: dt = standardize("%Y-%m-%d %H:%M:%S-4:00", item["published"].replace("T", " ")) ts = makeTimestamp(dt) if int(ts) > int(latest): # there are new articles sum_article = SummarizeUrl(item["links"][0]["href"]) full_article = grab_link(item["links"][0]["href"]).cleaned_text if len(sum_article) >= 3: ArticleModel(title = item["title"], author = item["author"], published = str(dt), published_timestamp = int(ts), image_url = str(item["content"][0]["value"].split()[2][4:].replace('"', "")), publication = "TheVerge", summarized_article = sum_article, full_article = full_article, upvoters = [], upvotes = 0 ).put() logging.info(item["title"] + " from The Verge has been stored") else: # there are no new articles return
def VentureBeat(latest): logging.info("VENTURE BEAT CALLED") url = "http://feeds.venturebeat.com/VentureBeat" feed = feedparser.parse(url) for item in feed["items"]: dt = standardize("%a, %d %b %Y %H:%M:%S GMT", item["published"]) ts = makeTimestamp(dt) if int(ts) > int(latest): # there are new articles sum_article = SummarizeUrl(item["links"][0]["href"]) full_article = grab_link(item["links"][0]["href"]).cleaned_text logging.info(full_article) if len(sum_article) >= 3: ArticleModel(title = item["title"], author = item["author"], published = str(dt), published_timestamp = int(ts), image_url = str(item["links"][1]["href"].replace("resize", "")), publication = "VentureBeat", summarized_article = sum_article, full_article = full_article, upvoters = [], upvotes = 0 ).put() logging.info(item["title"] + " from VentureBeat has been stored") else: # there are no new articles return
def FastCompany(latest): logging.info("FAST COMPANY CALLED") url = "http://feeds.feedburner.com/fastcompany/headlines" feed = feedparser.parse(url) for item in feed["items"]: dt = standardize("%a, %d %b %Y %H:%M:%S GMT", item["published"]) ts = makeTimestamp(dt) if int(ts) > int(latest): # there are new articles sum_article = SummarizeUrl(item["links"][0]["href"]) full_article = grab_link(item["links"][0]["href"]).cleaned_text logging.info(full_article) if len(sum_article) >= 3: ArticleModel(title = item["title"], author = item["author"], published = str(dt), published_timestamp = int(ts), image_url = str(item["media_content"][0]["url"]), publication = "FastCompany", summarized_article = sum_article, full_article = full_article, upvoters = [], upvotes = 0 ).put() logging.info(item["title"] + " from FastCompany has been stored") else: # there are no new articles return
def tease(): if request.form is None or 'url' not in request.form or request.form['url'] == '': return jsonify({'status': 'error', 'message': 'Please enter a valid URL.'}), 200 article = pyteaser.grab_link(request.form['url']) if article is None or article.cleaned_text == "": return jsonify({'status': 'error', 'message': 'Sorry, I can\'t summarize that website. :('}), 200 entry = {} entry['slug'] = os.urandom(3).encode('hex') entry['title'] = str(article.title.encode('utf-8', 'ignore')) text = str(article.cleaned_text.encode('utf-8', 'ignore')) entry['summary'] = pyteaser.Summarize(entry['title'], text) db.article.insert(entry) return jsonify({'status': 'redirect', 'message': url_for('article', slug = entry['slug'])}), 200
def on_get(self, req, resp): """Handles GET requests""" try: article = grab_link(req.get_param('url')) except IOError: print 'IOError' return None if not (article and article.cleaned_text and article.title): return None summaries = Summarize(unicode(article.title), unicode(article.cleaned_text)) body = ""; for summary in summaries: body = body + " " + summary; resp.status = falcon.HTTP_200 # This is the default status resp.body = json.dumps({'title':article.title, 'body':body}, encoding='utf-8')
def streaming_get_article_text(input_dict,widget,stream=None): import pyteaser text = pyteaser.grab_link(input_dict['url']) output_dict = {} output_dict['text']=text.cleaned_text return output_dict
def streaming_get_article_text(input_dict, widget, stream=None): import pyteaser text = pyteaser.grab_link(input_dict['url']) output_dict = {} output_dict['text'] = text.cleaned_text return output_dict