Example #1
0
def get_feedly_feeds():
    for topic in all_feeds:
        u = urlopen('http://cloud.feedly.com/v3/search/feeds?query=' + topic +
                    '&count=500&locale=en')
        h = u.read().decode('utf-8')
        j = json.loads(h, encoding='utf-8')

        results = list(filter(lambda x: 20 < x['velocity'] < 150,
                              j['results']))

        topic_vel = 0

        for r in results:
            feed_url = r['feedId'][5:]
            feed_title = r['title'][:100]
            try:
                feed_source = r['website']
            except KeyError:
                continue

            topic_vel += r['velocity']
            if topic_vel > 3000:
                break
            fr = FeedRec(feed_url=feed_url,
                         feed_title=feed_title,
                         feed_source=feed_source)
            fr.save()
        print(topic, len(results), topic_vel)
def add_feeds():
    for topic, feeds in all_feeds.items():
        for feed in feeds:
            f = feedparser.parse(feed['url'])
            print(f.feed.title)
            fr = FeedRec(feed_url=feed['url'], feed_title=f.feed.title, feed_source=feed['source'])
            fr.save()
Example #3
0
def add_feeds():
    for topic, feeds in all_feeds.items():
        for feed in feeds:
            f = feedparser.parse(feed['url'])
            print(f.feed.title)
            fr = FeedRec(feed_url=feed['url'],
                         feed_title=f.feed.title,
                         feed_source=feed['source'])
            fr.save()
def add_reuters_to_db():

    url_list = []
    reuters_urls = ['http://uk.reuters.com/tools/rss', 'http://www.reuters.com/tools/rss']

    for url in reuters_urls:

        doc = request.urlopen(url)

        soup = BeautifulSoup(doc)

        for s in soup.find_all('td', 'feedUrl'):
            m = re.search('(?<=//)(.*)', s.text)
            url_list.append("http://" + m.groups()[0])

        for u in url_list:
            print(u)
            f = feedparser.parse(u)
            if not f.feed.updated_parsed.tm_year < Date.today().year:

                fr = FeedRec(feed_url=u, feed_title=f.feed.title)
                fr.save()
Example #5
0
def add_reuters_to_db():

    url_list = []
    reuters_urls = [
        'http://uk.reuters.com/tools/rss', 'http://www.reuters.com/tools/rss'
    ]

    for url in reuters_urls:

        doc = request.urlopen(url)

        soup = BeautifulSoup(doc)

        for s in soup.find_all('td', 'feedUrl'):
            m = re.search('(?<=//)(.*)', s.text)
            url_list.append("http://" + m.groups()[0])

        for u in url_list:
            print(u)
            f = feedparser.parse(u)
            if not f.feed.updated_parsed.tm_year < Date.today().year:

                fr = FeedRec(feed_url=u, feed_title=f.feed.title)
                fr.save()
def get_feedly_feeds():
    for topic in all_feeds:
        u = urlopen('http://cloud.feedly.com/v3/search/feeds?query=' + topic + '&count=500&locale=en')
        h = u.read().decode('utf-8')
        j = json.loads(h, encoding='utf-8')

        results = list(filter(lambda x: 20 < x['velocity'] < 150, j['results']))

        topic_vel = 0

        for r in results:
            feed_url = r['feedId'][5:]
            feed_title = r['title'][:100]
            try:
                feed_source = r['website']
            except KeyError:
                continue

            topic_vel += r['velocity']
            if topic_vel > 3000:
                break
            fr = FeedRec(feed_url=feed_url, feed_title=feed_title, feed_source=feed_source)
            fr.save()
        print(topic, len(results), topic_vel)