Exemple #1
0
    def init_autoindex(self, auto_interval):
        if not auto_interval:
            return

        from pywb.manager.autoindex import AutoIndexer

        colls_dir = self.warcserver.root_dir if self.warcserver.root_dir else None

        indexer = AutoIndexer(colls_dir=colls_dir, interval=int(auto_interval))

        if not os.path.isdir(indexer.root_path):
            msg = 'No managed directory "{0}" for auto-indexing'
            logging.error(msg.format(indexer.root_path))
            import sys
            sys.exit(2)

        msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
        logging.info(msg.format(indexer.root_path, auto_interval))
        indexer.start()
Exemple #2
0
    def test_auto_index(self):
        main(['init', 'auto'])
        auto_dir = os.path.join(self.root_dir, COLLECTIONS, 'auto')
        archive_dir = os.path.join(auto_dir, ARCHIVE_DIR)

        archive_sub_dir = os.path.join(archive_dir, 'sub')
        os.makedirs(archive_sub_dir)

        def do_copy():
            try:
                time.sleep(1.0)
                shutil.copy(self._get_sample_warc('example.warc.gz'),
                            archive_dir)
                shutil.copy(self._get_sample_warc('example-extra.warc'),
                            archive_sub_dir)
                time.sleep(1.0)
            finally:
                indexer.interval = 0

        indexer = AutoIndexer(interval=0.25)
        indexer.start()

        ge = gevent.spawn(do_copy)
        ge.join()

        index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE)
        assert os.path.isfile(index_file)

        with open(index_file, 'r') as fh:
            index = fh.read()

        assert '"example.warc.gz' in index, index
        assert '"sub/example-extra.warc' in index, index

        mtime = os.path.getmtime(index_file)

        # Update
        indexer.interval = 0.25
        indexer.start()

        os.remove(index_file)

        #thread = threading.Thread(target=do_copy)
        #thread.daemon = True
        #thread.start()
        ge = gevent.spawn(do_copy)

        #wayback(['-p', '0', '-a', '--auto-interval', '0.25'])

        #thread.join()
        ge.join()

        # assert file was update
        assert os.path.getmtime(index_file) > mtime
Exemple #3
0
    def init_autoindex(self, auto_interval):
        """Initialize and start the auto-indexing of the collections. If auto_interval is None this is a no op.

        :param str|int auto_interval: The auto-indexing interval from the configuration file or CLI argument
        """
        if not auto_interval:
            return

        from pywb.manager.autoindex import AutoIndexer

        colls_dir = self.warcserver.root_dir if self.warcserver.root_dir else None

        indexer = AutoIndexer(colls_dir=colls_dir, interval=int(auto_interval))

        if not os.path.isdir(indexer.root_path):
            msg = 'No managed directory "{0}" for auto-indexing'
            logging.error(msg.format(indexer.root_path))
            import sys
            sys.exit(2)

        msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
        logging.info(msg.format(indexer.root_path, auto_interval))
        indexer.start()
Exemple #4
0
    def init_autoindex(self, auto_interval):
        """Initialize and start the auto-indexing of the collections. If auto_interval is None this is a no op.

        :param str|int auto_interval: The auto-indexing interval from the configuration file or CLI argument
        """
        if not auto_interval:
            return

        from pywb.manager.autoindex import AutoIndexer

        colls_dir = self.warcserver.root_dir if self.warcserver.root_dir else None

        indexer = AutoIndexer(colls_dir=colls_dir, interval=int(auto_interval))

        if not os.path.isdir(indexer.root_path):
            msg = 'No managed directory "{0}" for auto-indexing'
            logging.error(msg.format(indexer.root_path))
            import sys
            sys.exit(2)

        msg = 'Auto-Indexing Enabled on "{0}", checking every {1} secs'
        logging.info(msg.format(indexer.root_path, auto_interval))
        indexer.start()
Exemple #5
0
    def test_auto_index(self):
        main(['init', 'auto'])
        auto_dir = os.path.join(self.root_dir, COLLECTIONS, 'auto')
        archive_dir = os.path.join(auto_dir, ARCHIVE_DIR)

        archive_sub_dir = os.path.join(archive_dir, 'sub')
        os.makedirs(archive_sub_dir)

        def do_copy():
            try:
                time.sleep(1.0)
                shutil.copy(self._get_sample_warc('example.warc.gz'), archive_dir)
                shutil.copy(self._get_sample_warc('example-extra.warc'), archive_sub_dir)
                time.sleep(1.0)
            finally:
                indexer.interval = 0

        indexer = AutoIndexer(interval=0.25)
        indexer.start()

        ge = gevent.spawn(do_copy)
        ge.join()

        index_file = os.path.join(auto_dir, INDEX_DIR, AUTOINDEX_FILE)
        assert os.path.isfile(index_file)

        with open(index_file, 'r') as fh:
            index = fh.read()

        assert '"example.warc.gz' in index, index
        assert '"sub/example-extra.warc' in index, index

        mtime = os.path.getmtime(index_file)

        # Update
        indexer.interval = 0.25
        indexer.start()

        os.remove(index_file)

        #thread = threading.Thread(target=do_copy)
        #thread.daemon = True
        #thread.start()
        ge = gevent.spawn(do_copy)

        #wayback(['-p', '0', '-a', '--auto-interval', '0.25'])

        #thread.join()
        ge.join()

	# assert file was update
        assert os.path.getmtime(index_file) > mtime
Exemple #6
0
 def setup_class(cls):
     super(TestRecordReplay, cls).setup_class('config_test_record.yaml')
     cls.indexer = AutoIndexer(interval=0.1)
     cls.indexer.start()