Exemple #1
0
 def __init__(self, source_patterns=None):
     super().__init__()
     self.daemon = False
     self.sources = source_patterns
     self.sources = self.load_sources()
     # initialize Loader
     self.loader = RedditLoader(sources=self.sources,
                                settings_json=settings.to_json())
     self.deduplicator = Deduplicator(
         settings_json=settings.to_json(),
         stop_event=self.loader.get_stop_event())
     self._downloaders = self._create_downloaders()
     self._all_processes = [self.loader, *self._downloaders]
     if settings.get('processing.deduplicate_files'):
         self._all_processes.append(self.deduplicator)
Exemple #2
0
 def _create_downloaders(self):
     dls = []
     for i in range(settings.get('threading.concurrent_downloads')):
         tp = Downloader(reader=self.loader.get_reader(),
                         ack_queue=self.loader.get_ack_queue(),
                         settings_json=settings.to_json())
         dls.append(tp)
     return dls
	def __init__(self, source_patterns=None):
		super().__init__()
		sql.init_from_settings()  # Make sure the database is built & migrated before starting threads.
		sql.close()
		self.daemon = False
		self.sources = source_patterns
		self.sources = self.load_sources()
		self.db_lock = RLock()
		# initialize Loader
		self.loader = RedditLoader(sources=self.sources, settings_json=settings.to_json(), db_lock=self.db_lock)
		self.deduplicator = Deduplicator(
			settings_json=settings.to_json(),
			stop_event=self.loader.get_stop_event(),
			db_lock=self.db_lock
		)
		self._downloaders = self._create_downloaders()
		self._all_processes = [self.loader, *self._downloaders]
		if settings.get('processing.deduplicate_files'):
			self._all_processes.append(self.deduplicator)
    def test_download(self):
        """ Downloader should work """
        stop_event = multiprocessing.Event()
        in_queue = multiprocessing.Queue()
        ack_queue = multiprocessing.Queue()
        reader = QueueReader(in_queue, stop_event)
        dl = downloader.Downloader(reader, ack_queue, settings.to_json(),
                                   multiprocessing.RLock())
        stats = {'ack': 0, 'sent': 0}

        def add_test(inf):
            sess = sql.session()
            lst = sess.query(sql.URL).all()
            st = time.time()
            sent = []
            for l in lst:
                in_queue.put_nowait(l.id)
                inf['sent'] += 1
                sent.append(l.id)
            while time.time() - st < 30 and sent:
                try:
                    rd = ack_queue.get(block=True, timeout=.5)
                    inf['ack'] += 1
                    sent.remove(rd.url_id)
                except queue.Empty:
                    pass
            sess.close()
            stop_event.set()

        thread = Thread(target=add_test, args=(stats, ))
        thread.start()
        dl.run()
        thread.join()
        self.assertGreater(stats['sent'],
                           0,
                           msg='Failed to send any test URLS for download!')
        self.assertEqual(stats['sent'],
                         stats['ack'],
                         msg='Not all sent URLs were Acked!')
        self.assertFalse(dl.progress.get_running(),
                         msg='Failed to clear running status!')