Esempio n. 1
0
    def build_postfetch_chain(self, inq):
        self._postfetch_chain = []

        self.dedup_db = Factory.dedup_db(self.options)

        if self.dedup_db:
            self._postfetch_chain.append(self.dedup_db.loader())

        self.warc_writer_processor = Factory.warc_writer_processor(
            self.options)
        self._postfetch_chain.append(self.warc_writer_processor)

        if self.dedup_db:
            self._postfetch_chain.append(self.dedup_db.storer())

        if self.stats_processor:
            self._postfetch_chain.append(self.stats_processor)

        if self.playback_proxy:
            self._postfetch_chain.append(
                warcprox.ListenerPostfetchProcessor(
                    self.playback_proxy.playback_index_db, self.options))

        crawl_logger = Factory.crawl_logger(self.options)
        if crawl_logger:
            self._postfetch_chain.append(
                warcprox.ListenerPostfetchProcessor(crawl_logger,
                                                    self.options))

        for qualname in self.options.plugins or []:
            plugin = Factory.plugin(qualname, self.options)
            if hasattr(plugin, 'notify'):
                self._postfetch_chain.append(
                    warcprox.ListenerPostfetchProcessor(plugin, self.options))
            elif hasattr(
                    plugin,
                    'CHAIN_POSITION') and plugin.CHAIN_POSITION == 'early':
                self._postfetch_chain.insert(0, plugin)
            else:
                self._postfetch_chain.append(plugin)

        self._postfetch_chain.append(
            warcprox.ListenerPostfetchProcessor(self.proxy.running_stats,
                                                self.options))

        # chain them all up
        self._postfetch_chain[0].inq = inq
        for i in range(1, len(self._postfetch_chain)):
            self.chain(self._postfetch_chain[i - 1], self._postfetch_chain[i])
Esempio n. 2
0
 def storer(self, *args, **kwargs):
     return warcprox.ListenerPostfetchProcessor(self, self.options)