def load(self): """ Threaded loading of elements. """ settings.from_json(self.settings) sql.init_from_settings() self._session = sql.session() self.progress.set_scanning(True) retry_failed = settings.get('processing.retry_failed') # Query for all unhandled URLs, and submit them before scanning for new Posts. unfinished = self._session\ .query(sql.URL)\ .filter((sql.URL.processed == False) | (retry_failed and sql.URL.failed == True))\ .all() self._push_url_list(unfinished) self._scan_sources() self.progress.set_scanning(False) # Wait for any remaining ACKS to come in, before closing the writing pipe. # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in. while len(self._open_ack) > 0 and not self._stop_event.is_set(): self._handle_acks(timeout=0.5) print("Finished loading.") sql.close()
def load(self): """ Threaded loading of elements. """ settings.from_json(self.settings) sql.init_from_settings() self._session = sql.session() t_start = datetime.now() #vy print("Started loading.") #vy self.progress.set_scanning(True) retry_failed = settings.get('processing.retry_failed') # Query for all unhandled URLs, and submit them before scanning for new Posts. unfinished = self._session\ .query(sql.URL)\ .filter((sql.URL.processed == False) | \ (retry_failed and sql.URL.failed and \ sql.not_(sql.URL.failure_reason.contains('404'))))\ .all() print("Loading %s unfinished urls" % len(unfinished)) self._push_url_list(unfinished) self._scan_sources() self.progress.set_scanning(False) # Wait for any remaining ACKS to come in, before closing the writing pipe. # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in. while len(self._open_ack) > 0 and not self._stop_event.is_set(): self._handle_acks(timeout=1.0, clear=True) print("Finished loading.") #vy print("Elapsed time: %s" % str(datetime.now() - t_start)) #vy sql.close()
def run(self): """ Threaded loading of elements. """ settings.from_json(self._settings) sql.init_from_settings() self._session = sql.session() self.progress.clear(status="Starting up...") self.progress.set_running(True) while not self._stop_event.is_set(): self._dedupe() self.progress.set_status("Waiting for new files...") self._stop_event.wait(2) self._dedupe() # Run one final pass after downloading stops. self.progress.set_running(False) sql.close() self.progress.clear("Finished.")
def run(self): """ Threaded loading of elements. """ settings.from_json(self._settings) sql.init_from_settings() try: self._session = sql.session() self.progress.clear(status="Starting up...") self.progress.set_running(True) while not self._stop_event.is_set(): self._dedupe() self.progress.set_status("Ready for new files...") self._stop_event.wait(2) self._dedupe() # Run one final pass after downloading stops. self.progress.clear(status="Finished.", running=False) except Exception as ex: print('Deduplication Process Error:', ex) self.progress.set_error(ex) self.progress.set_running(False) traceback.print_exc() finally: sql.close()
def run(self): """ Threaded loading of elements. """ settings.from_json(self._settings) sql.init_from_settings() print("Starting up...", debug=True) try: self._session = sql.session() self.progress.clear(status="Starting up...") self.progress.set_running(True) self.dedup_ignore_ids = set() self.prune_counter = 0 self.special_hashes = self._session.query(Hash).filter( Hash.id < 0).all() while not self._stop_event.is_set(): #print("_stop_event is %s"%self._stop_event.is_set(), debug=True) completed = self._dedupe() if completed: self.progress.set_status( "Completed %s files. Ready for new files..." % completed) self._stop_event.wait(1) else: self._stop_event.wait(10) print("_stop_event is %s" % self._stop_event.is_set(), debug=True) self._dedupe() # Run one final pass after downloading stops. self.progress.clear(status="Finished.", running=False) except Exception as ex: print('Deduplication Process Error:', ex) self.progress.set_error(ex) self.progress.set_running(False) traceback.print_exc() finally: print("Finished process, _stop_event is %s" % self._stop_event.is_set(), debug=True) sql.close()
def run(self): """ Threaded loading of elements. """ settings.from_json(self._settings) sql.init_from_settings() self._session = sql.session() self.progress.clear(status="Starting up...", running=True) failed = False for nxt_id in self._reader: try: url = self._session.query( sql.URL).filter(sql.URL.id == nxt_id).first() if not url: raise Exception("Unknown URL ID provided: (%s}" % nxt_id) file = url.file path = SanitizedRelFile(base=settings.get("output.base_dir"), file_path=str(file.path)) self.progress.set_file(path.relative()) self.progress.set_status("Attempting to Handle URL...") self.progress.set_running(True) task = handlers.HandlerTask(url=url.address, file_obj=path) resp = handlers.handle(task, self.progress) is_album_parent = False with self._db_lock: if resp.album_urls: if url.album_id: resp.album_urls = [ ] # Ignore nested Albums to avoid recursion. else: url.album_id = str(uuid.uuid4()) is_album_parent = True else: resp.album_urls = [] url.failed = not resp.success url.failure_reason = resp.failure_reason url.last_handler = resp.handler url.album_is_parent = is_album_parent if resp.rel_file: file.downloaded = True file.path = resp.rel_file.relative() file.hash = None utime(resp.rel_file.absolute(), times=(time(), time())) self._session.commit() # Once *all* processing is completed on this URL, the Downloader needs to ACK it. # If any additional Album URLS were located, they should be sent before the ACK. self._ack_queue.put( AckPacket(url_id=nxt_id, extra_urls=resp.album_urls)) self.progress.clear(status="Waiting for URL...") except Exception as ex: failed = str(ex) self._ack_queue.put(AckPacket(url_id=nxt_id, extra_urls=[])) print(ex) traceback.print_exc() self.progress.set_error("Exited with error: {%s}" % failed) break sql.close() self.progress.clear( "Finished." if not failed else "Exited with error: %s" % failed, running=False)