def load(self):
        """ Threaded loading of elements. """
        settings.from_json(self.settings)
        sql.init_from_settings()
        self._session = sql.session()

        self.progress.set_scanning(True)

        retry_failed = settings.get('processing.retry_failed')

        # Query for all unhandled URLs, and submit them before scanning for new Posts.
        unfinished = self._session\
         .query(sql.URL)\
         .filter((sql.URL.processed == False) | (retry_failed and sql.URL.failed == True))\
         .all()
        self._push_url_list(unfinished)

        self._scan_sources()

        self.progress.set_scanning(False)
        # Wait for any remaining ACKS to come in, before closing the writing pipe.
        # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in.
        while len(self._open_ack) > 0 and not self._stop_event.is_set():
            self._handle_acks(timeout=0.5)
        print("Finished loading.")
        sql.close()
Beispiel #2
0
    def load(self):
        """ Threaded loading of elements. """
        settings.from_json(self.settings)
        sql.init_from_settings()
        self._session = sql.session()
        t_start = datetime.now()  #vy
        print("Started loading.")  #vy
        self.progress.set_scanning(True)

        retry_failed = settings.get('processing.retry_failed')

        # Query for all unhandled URLs, and submit them before scanning for new Posts.
        unfinished = self._session\
         .query(sql.URL)\
         .filter((sql.URL.processed == False) | \
          (retry_failed and sql.URL.failed and \
           sql.not_(sql.URL.failure_reason.contains('404'))))\
         .all()
        print("Loading %s unfinished urls" % len(unfinished))
        self._push_url_list(unfinished)

        self._scan_sources()

        self.progress.set_scanning(False)
        # Wait for any remaining ACKS to come in, before closing the writing pipe.
        # ...Until the Downloaders have confirmed completion of everything, more album URLS may come in.
        while len(self._open_ack) > 0 and not self._stop_event.is_set():
            self._handle_acks(timeout=1.0, clear=True)
        print("Finished loading.")  #vy
        print("Elapsed time: %s" % str(datetime.now() - t_start))  #vy
        sql.close()
    def run(self):
        """ Threaded loading of elements. """
        settings.from_json(self._settings)
        sql.init_from_settings()
        self._session = sql.session()
        self.progress.clear(status="Starting up...")
        self.progress.set_running(True)

        while not self._stop_event.is_set():
            self._dedupe()
            self.progress.set_status("Waiting for new files...")
            self._stop_event.wait(2)
        self._dedupe()  # Run one final pass after downloading stops.

        self.progress.set_running(False)
        sql.close()
        self.progress.clear("Finished.")
Beispiel #4
0
    def run(self):
        """ Threaded loading of elements. """
        settings.from_json(self._settings)
        sql.init_from_settings()
        try:
            self._session = sql.session()
            self.progress.clear(status="Starting up...")
            self.progress.set_running(True)

            while not self._stop_event.is_set():
                self._dedupe()
                self.progress.set_status("Ready for new files...")
                self._stop_event.wait(2)
            self._dedupe()  # Run one final pass after downloading stops.
            self.progress.clear(status="Finished.", running=False)
        except Exception as ex:
            print('Deduplication Process Error:', ex)
            self.progress.set_error(ex)
            self.progress.set_running(False)
            traceback.print_exc()
        finally:
            sql.close()
    def run(self):
        """ Threaded loading of elements. """
        settings.from_json(self._settings)
        sql.init_from_settings()
        print("Starting up...", debug=True)
        try:
            self._session = sql.session()
            self.progress.clear(status="Starting up...")
            self.progress.set_running(True)
            self.dedup_ignore_ids = set()
            self.prune_counter = 0
            self.special_hashes = self._session.query(Hash).filter(
                Hash.id < 0).all()

            while not self._stop_event.is_set():
                #print("_stop_event is %s"%self._stop_event.is_set(), debug=True)
                completed = self._dedupe()
                if completed:
                    self.progress.set_status(
                        "Completed %s files. Ready for new files..." %
                        completed)
                    self._stop_event.wait(1)
                else:
                    self._stop_event.wait(10)
            print("_stop_event is %s" % self._stop_event.is_set(), debug=True)
            self._dedupe()  # Run one final pass after downloading stops.
            self.progress.clear(status="Finished.", running=False)
        except Exception as ex:
            print('Deduplication Process Error:', ex)
            self.progress.set_error(ex)
            self.progress.set_running(False)
            traceback.print_exc()
        finally:
            print("Finished process, _stop_event is %s" %
                  self._stop_event.is_set(),
                  debug=True)
            sql.close()
    def run(self):
        """ Threaded loading of elements. """
        settings.from_json(self._settings)
        sql.init_from_settings()
        self._session = sql.session()
        self.progress.clear(status="Starting up...", running=True)
        failed = False

        for nxt_id in self._reader:
            try:
                url = self._session.query(
                    sql.URL).filter(sql.URL.id == nxt_id).first()
                if not url:
                    raise Exception("Unknown URL ID provided: (%s}" % nxt_id)

                file = url.file
                path = SanitizedRelFile(base=settings.get("output.base_dir"),
                                        file_path=str(file.path))

                self.progress.set_file(path.relative())
                self.progress.set_status("Attempting to Handle URL...")
                self.progress.set_running(True)

                task = handlers.HandlerTask(url=url.address, file_obj=path)
                resp = handlers.handle(task, self.progress)

                is_album_parent = False

                with self._db_lock:
                    if resp.album_urls:
                        if url.album_id:
                            resp.album_urls = [
                            ]  # Ignore nested Albums to avoid recursion.
                        else:
                            url.album_id = str(uuid.uuid4())
                            is_album_parent = True
                    else:
                        resp.album_urls = []

                    url.failed = not resp.success
                    url.failure_reason = resp.failure_reason
                    url.last_handler = resp.handler
                    url.album_is_parent = is_album_parent

                    if resp.rel_file:
                        file.downloaded = True
                        file.path = resp.rel_file.relative()
                        file.hash = None
                        utime(resp.rel_file.absolute(), times=(time(), time()))

                    self._session.commit()

                # Once *all* processing is completed on this URL, the Downloader needs to ACK it.
                # If any additional Album URLS were located, they should be sent before the ACK.
                self._ack_queue.put(
                    AckPacket(url_id=nxt_id, extra_urls=resp.album_urls))
                self.progress.clear(status="Waiting for URL...")
            except Exception as ex:
                failed = str(ex)
                self._ack_queue.put(AckPacket(url_id=nxt_id, extra_urls=[]))
                print(ex)
                traceback.print_exc()
                self.progress.set_error("Exited with error: {%s}" % failed)
                break

        sql.close()
        self.progress.clear(
            "Finished." if not failed else "Exited with error: %s" % failed,
            running=False)