def get_feedly_feeds(): for topic in all_feeds: u = urlopen('http://cloud.feedly.com/v3/search/feeds?query=' + topic + '&count=500&locale=en') h = u.read().decode('utf-8') j = json.loads(h, encoding='utf-8') results = list(filter(lambda x: 20 < x['velocity'] < 150, j['results'])) topic_vel = 0 for r in results: feed_url = r['feedId'][5:] feed_title = r['title'][:100] try: feed_source = r['website'] except KeyError: continue topic_vel += r['velocity'] if topic_vel > 3000: break fr = FeedRec(feed_url=feed_url, feed_title=feed_title, feed_source=feed_source) fr.save() print(topic, len(results), topic_vel)
def add_feeds(): for topic, feeds in all_feeds.items(): for feed in feeds: f = feedparser.parse(feed['url']) print(f.feed.title) fr = FeedRec(feed_url=feed['url'], feed_title=f.feed.title, feed_source=feed['source']) fr.save()
def add_reuters_to_db(): url_list = [] reuters_urls = ['http://uk.reuters.com/tools/rss', 'http://www.reuters.com/tools/rss'] for url in reuters_urls: doc = request.urlopen(url) soup = BeautifulSoup(doc) for s in soup.find_all('td', 'feedUrl'): m = re.search('(?<=//)(.*)', s.text) url_list.append("http://" + m.groups()[0]) for u in url_list: print(u) f = feedparser.parse(u) if not f.feed.updated_parsed.tm_year < Date.today().year: fr = FeedRec(feed_url=u, feed_title=f.feed.title) fr.save()
def add_reuters_to_db(): url_list = [] reuters_urls = [ 'http://uk.reuters.com/tools/rss', 'http://www.reuters.com/tools/rss' ] for url in reuters_urls: doc = request.urlopen(url) soup = BeautifulSoup(doc) for s in soup.find_all('td', 'feedUrl'): m = re.search('(?<=//)(.*)', s.text) url_list.append("http://" + m.groups()[0]) for u in url_list: print(u) f = feedparser.parse(u) if not f.feed.updated_parsed.tm_year < Date.today().year: fr = FeedRec(feed_url=u, feed_title=f.feed.title) fr.save()