Beispiel #1
0
    def initialize(self):
        # initialize URL cache
        # use the SHA1 of channel cid + rss_url as key
        cache_key = hashlib.sha1(self.channel_community.cid)
        cache_key.update(self.rss_url)
        cache_key_str = hexlify(cache_key.digest())
        self._logger.debug(u"using key %s for channel %s, rss %s",
                           cache_key_str, hexlify(self.channel_community.cid),
                           self.rss_url)

        url_cache_name = u"rss_cache_%s.txt" % cache_key_str
        url_cache_path = os.path.join(self.session.config.get_state_dir(),
                                      url_cache_name)
        self._url_cache = SimpleCache(url_cache_path)
        self._url_cache.load()

        # schedule the scraping task
        self.register_task(u"rss_scrape",
                           reactor.callLater(2, self._task_scrape))

        # subscribe to channel torrent creation
        self.session.notifier.add_observer(
            self.on_channel_torrent_created,
            SIGNAL_CHANNEL_COMMUNITY, [SIGNAL_ON_TORRENT_UPDATED],
            self.channel_community.get_channel_id())

        # notify that a RSS feed has been created
        rss_feed_data = {
            u'channel': self.channel_community,
            u'rss_feed_url': self.rss_url
        }
        self.session.notifier.notify(SIGNAL_RSS_FEED, SIGNAL_ON_UPDATED, None,
                                     rss_feed_data)
        self.running = True
Beispiel #2
0
 def test_parse_rss_feed(self):
     self.channel_rss.rss_url = os.path.join(TESTS_DATA_DIR, 'test_rss.xml')
     self.channel_rss._url_cache = SimpleCache(
         os.path.join(self.session_base_dir, 'cache.txt'))
     dl = self.channel_rss.parse_feed()
     self.assertIsInstance(dl, DeferredList)
     yield dl
Beispiel #3
0
 def test_parse(self):
     parser = RSSFeedParser()
     for rss_item in parser.parse(os.path.join(TESTS_DATA_DIR, 'test_rss.xml'),
                                  SimpleCache(os.path.join(self.session_base_dir, 'cache.txt'))):
         self.assertEqual(len(rss_item['thumbnail_list']), 1)
         self.assertEqual(rss_item['title'], "ubuntu-15.04-desktop-amd64.iso")
         self.assertEqual(rss_item['description'], '')
Beispiel #4
0
 def test_parse_rss_feed(self):
     prepare_xml_rss(self.session_base_dir, 'test_rss.xml')
     self.channel_rss.rss_url = os.path.join(self.session_base_dir, 'test_rss.xml')
     self.channel_rss._url_cache = SimpleCache(os.path.join(self.session_base_dir, 'cache.txt'))
     dl = self.channel_rss.parse_feed()
     self.assertIsInstance(dl, DeferredList)
     return dl
Beispiel #5
0
    def test_parse(self):
        test_rss_file = os.path.join(TESTS_DATA_DIR, 'test_rss.xml')
        files_path = os.path.join(self.session_base_dir, 'files')
        os.mkdir(files_path)
        shutil.copyfile(test_rss_file, os.path.join(files_path, 'test_rss.xml'))
        file_server_port = get_random_port()
        self.setUpFileServer(file_server_port, files_path)

        parser = RSSFeedParser()
        cache = SimpleCache(os.path.join(self.session_base_dir, 'cache.txt'))
        cache.add('http://localhost:RANDOMPORT/ubuntu.torrent')

        def on_items(rss_items):
            self.assertEqual(len(rss_items), 2)
            self.assertEqual(len(rss_items[0]['thumbnail_list']), 1)

        return parser.parse('http://localhost:%d/test_rss.xml' % file_server_port, cache).addCallback(on_items)
    def test_parse_rss_feed(self):
        """
        Test parsing a rss feed
        """
        self.channel_rss.rss_url = 'http://localhost:%d/test_rss.xml' % self.file_server_port
        self.channel_rss._url_cache = SimpleCache(
            os.path.join(self.session_base_dir, 'cache.txt'))

        def verify_rss(items):
            self.assertEqual(len(items), 2)

        return self.channel_rss.parse_feed().addCallback(verify_rss)
Beispiel #7
0
    def test_parse_feed_stopped(self):
        """
        Test whether items are not parsed anymore when the parse feeder is stopped
        """
        self.channel_rss.rss_url = 'http://localhost:%d/test_rss.xml' % self.file_server_port
        self.channel_rss._url_cache = SimpleCache(os.path.join(self.session_base_dir, 'cache.txt'))
        self.channel_rss._to_stop = True

        def verify_rss(items):
            self.assertEqual(len(items), 0)

        return self.channel_rss.parse_feed().addCallback(verify_rss)
Beispiel #8
0
class ChannelRssParser(TaskManager):
    def __init__(self,
                 session,
                 channel_community,
                 rss_url,
                 check_interval=DEFAULT_CHECK_INTERVAL):
        super(ChannelRssParser, self).__init__()
        self._logger = logging.getLogger(self.__class__.__name__)

        self.session = session
        self.channel_community = channel_community
        self.rss_url = rss_url
        self.check_interval = check_interval

        self._url_cache = None

        self._pending_metadata_requests = {}

        self._to_stop = False

        self.running = False

    @blocking_call_on_reactor_thread
    def initialize(self):
        # initialize URL cache
        # use the SHA1 of channel cid + rss_url as key
        cache_key = hashlib.sha1(self.channel_community.cid)
        cache_key.update(self.rss_url)
        cache_key_str = hexlify(cache_key.digest())
        self._logger.debug(u"using key %s for channel %s, rss %s",
                           cache_key_str, hexlify(self.channel_community.cid),
                           self.rss_url)

        url_cache_name = u"rss_cache_%s.txt" % cache_key_str
        url_cache_path = os.path.join(self.session.config.get_state_dir(),
                                      url_cache_name)
        self._url_cache = SimpleCache(url_cache_path)
        self._url_cache.load()

        # schedule the scraping task
        self.register_task(u"rss_scrape",
                           reactor.callLater(2, self._task_scrape))

        # subscribe to channel torrent creation
        self.session.notifier.add_observer(
            self.on_channel_torrent_created,
            SIGNAL_CHANNEL_COMMUNITY, [SIGNAL_ON_TORRENT_UPDATED],
            self.channel_community.get_channel_id())

        # notify that a RSS feed has been created
        rss_feed_data = {
            u'channel': self.channel_community,
            u'rss_feed_url': self.rss_url
        }
        self.session.notifier.notify(SIGNAL_RSS_FEED, SIGNAL_ON_UPDATED, None,
                                     rss_feed_data)
        self.running = True

    @blocking_call_on_reactor_thread
    def shutdown(self):
        self._to_stop = True
        self.cancel_all_pending_tasks()

        self._url_cache.save()
        self._url_cache = None

        self.channel_community = None
        self.session = None
        self.running = False

    def parse_feed(self):
        rss_parser = RSSFeedParser()

        def on_rss_items(rss_items):
            if not rss_items:
                self._logger.warning(u"No RSS items found.")
                return succeed(None)

            def_list = []
            for rss_item in rss_items:
                if self._to_stop:
                    continue

                torrent_url = rss_item[u'torrent_url'].encode('utf-8')
                if torrent_url.startswith('magnet:'):
                    self._logger.warning(
                        u"Tribler does not support adding magnet links to a channel from a RSS feed."
                    )
                    continue

                torrent_deferred = getPage(torrent_url)
                torrent_deferred.addCallbacks(
                    lambda t, r=rss_item: self.on_got_torrent(t, rss_item=r),
                    self.on_got_torrent_error)
                def_list.append(torrent_deferred)

            return DeferredList(def_list, consumeErrors=True)

        return rss_parser.parse(self.rss_url,
                                self._url_cache).addCallback(on_rss_items)

    def _task_scrape(self):
        deferred = self.parse_feed()

        if not self._to_stop:
            # schedule the next scraping task
            self._logger.info(u"Finish scraping %s, schedule task after %s",
                              self.rss_url, self.check_interval)
            self.register_task(
                u'rss_scrape',
                reactor.callLater(self.check_interval, self._task_scrape))

        return deferred

    def on_got_torrent(self, torrent_data, rss_item=None):
        if self._to_stop:
            return

        # save torrent
        tdef = TorrentDef.load_from_memory(torrent_data)
        self.session.lm.rtorrent_handler.save_torrent(tdef)

        # add metadata pending request
        info_hash = tdef.get_infohash()
        if u'thumbnail_list' in rss_item and rss_item[u'thumbnail_list']:
            # only use the first thumbnail
            rss_item[u'thumbnail_url'] = rss_item[u'thumbnail_list'][0]
            if info_hash not in self._pending_metadata_requests:
                self._pending_metadata_requests[info_hash] = rss_item

        # create channel torrent
        self.channel_community._disp_create_torrent_from_torrentdef(
            tdef, long(time.time()))

        # update URL cache
        self._url_cache.add(rss_item[u'torrent_url'])
        self._url_cache.save()

        self._logger.info(u"Channel torrent %s created",
                          tdef.get_name_as_unicode())

    def on_got_torrent_error(self, failure):
        """
        This callback is invoked when the lookup for a specific torrent failed.
        """
        self._logger.warning(u"Failed to fetch torrent info from RSS feed: %s",
                             failure)

    def on_channel_torrent_created(self, subject, events, object_id,
                                   data_list):
        if self._to_stop:
            return

        for data in data_list:
            if data[u'info_hash'] in self._pending_metadata_requests:
                rss_item = self._pending_metadata_requests.pop(
                    data[u'info_hash'])
                rss_item[u'info_hash'] = data[u'info_hash']
                rss_item[u'channel_torrent_id'] = data[u'channel_torrent_id']

                metadata_deferred = getPage(
                    rss_item[u'thumbnail_url'].encode('utf-8'))
                metadata_deferred.addCallback(lambda md, r=rss_item: self.
                                              on_got_metadata(md, rss_item=r))

    def on_got_metadata(self, metadata_data, rss_item=None):
        # save metadata
        thumb_hash = hashlib.sha1(metadata_data).digest()
        self.session.lm.rtorrent_handler.save_metadata(thumb_hash,
                                                       metadata_data)

        # create modification message for channel
        modifications = {
            u'metadata-json':
            json.dumps({
                u'title': rss_item['title'][:64],
                u'description': rss_item['description'][:768],
                u'thumb_hash': thumb_hash.encode('hex')
            })
        }
        self.channel_community.modifyTorrent(rss_item[u'channel_torrent_id'],
                                             modifications)
Beispiel #9
0
 def test_parse_feed_stopped(self):
     self.channel_rss.rss_url = os.path.join(TESTS_DATA_DIR, 'test_rss.xml')
     self.channel_rss._url_cache = SimpleCache(
         os.path.join(self.session_base_dir, 'cache.txt'))
     self.channel_rss._to_stop = True
     self.assertIsNone(self.channel_rss.parse_feed())