예제 #1
0
    def _update_book_count_thread(self):
        while self.should_run:
            time.sleep(1)
            try:
                for dir in list(self.book_count.keys()):
                    dirs = []
                    parentdirs = self.book_count[dir]["parentdirs"]
                    if parentdirs:
                        for parentdir in parentdirs:
                            dirs.append(os.path.join(dir, parentdirs[parentdir]))
                    else:
                        dirs.append(dir)
                    if (self.book_count[dir]["modified"] + 15 < time.time()):
                        books = []
                        for d in dirs:
                            if os.path.isdir(d):
                                books += Filesystem.list_book_dir(d)
                        self.book_count[dir]["modified"] = time.time()
                        self.book_count[dir]["count"] = len(set(books))

                    if not self.should_run:
                        break

            except Exception:
                logging.exception("An error occurred while updating book count")
예제 #2
0
def triggerDirectoryEdition(directory_id, edition_id):
    path = os.path.normpath(Directory.dirs_flat[directory_id]) if directory_id in Directory.dirs_flat else None

    if not path:
        return None, 404

    file_stems = [Path(file).stem for file in Filesystem.list_book_dir(path)]
    if edition_id not in file_stems:
        return None, 404

    result = []
    for pipeline in Pipeline.pipelines:
        if pipeline.dir_in and os.path.normpath(pipeline.dir_in) == path:
            pipeline.trigger(edition_id, auto=False)
            result.append(pipeline.uid)

    return jsonify(result), 200
예제 #3
0
    def _trigger_epub_catalog_thread(self):
        last_check = 0

        self.watchdog_bark()
        while self.shouldRun:
            time.sleep(5)
            self.watchdog_bark()

            if not self.dirsAvailable():
                continue

            # Check for update every 3 days
            max_update_interval = 60 * 60 * 24 * 3
            if time.time() - last_check < max_update_interval:
                continue

            last_check = time.time()
            logging.info("Updating formatklar and filesize for ebooks")
            list_books = Filesystem.list_book_dir(self.dir_out)
            Bibliofil.update_list_of_books("XHTML", list_books)
예제 #4
0
    def _trigger_newsletter_thread(self):
        last_check = 0
        # If no newsletter this month, trigger newsletter
        self.watchdog_bark()
        while self.shouldRun:
            time.sleep(5)
            self.watchdog_bark()

            if not self.dirsAvailable():
                continue

            max_update_interval = 60 * 60
            if time.time() - last_check < max_update_interval:
                continue

            last_check = time.time()
            self.newsletter_identifier = "120209"
            self.newsletter_identifier += time.strftime("%m%Y")
            self.year_month = datetime.datetime.today().strftime('%Y-%m')
            if self.newsletter_identifier not in Filesystem.list_book_dir(
                    self.dir_out):
                logging.info("Lager nyhetsbrev for: " + self.year_month)
                self.trigger(self.newsletter_identifier)
예제 #5
0
    def _monitor_book_events_thread(self):
        self.initialize_checksums()

        while self.shouldRun:
            try:
                # books that are recently changed (check often in case of new file changes)
                with self._md5_lock:
                    recently_changed = sorted([
                        book for book in self._md5 if time.time() -
                        self._md5[book]["modified"] < self.inactivity_timeout
                    ],
                                              key=lambda rc: self._md5[rc][
                                                  "modified"])
                    if recently_changed:
                        for book in recently_changed:
                            deep_md5, _ = Filesystem.path_md5(
                                path=os.path.join(self.dir_path, book),
                                shallow=False)
                            self._md5[book]["deep_checked"] = int(time.time())
                            if deep_md5 != self._md5[book]["deep"]:
                                self._md5[book]["modified"] = int(time.time())
                                self._update_md5(book)
                                logging.debug(
                                    "book modified (and was recently modified, might be in the middle of a copy operation): {}"
                                    .format(book))
                                self.notify_book_event_handlers(
                                    book, "modified")

                        time.sleep(0.1)  # a small nap
                        continue

                time.sleep(
                    1
                )  # unless anything has recently changed, give the system time to breathe between each iteration

                if not self.is_available():
                    time.sleep(5)
                    continue

                dirlist = Filesystem.list_book_dir(self.dir_path)
                sorted_dirlist = []
                should_deepscan = []

                # books that have explicitly been requested for rescan should be rescanned first
                if self.suggested_for_rescan:
                    for book_id in self.suggested_for_rescan:
                        book_path = os.path.join(self.dir_path, book_id)

                        if os.path.exists(book_path):
                            sorted_dirlist.append(book_id)
                            should_deepscan.append(book_id)

                        else:
                            # if book is a file, then it can have a file extension
                            for dirname in dirlist:
                                if Path(dirname).stem == book_id:
                                    sorted_dirlist.append(dirname)
                                    should_deepscan.append(dirname)
                                    break

                    # empty list after having put the suggestions at the front of the queue
                    self.suggested_for_rescan = []

                    # add the remaining books to the list
                    for dirname in dirlist:
                        if dirname not in sorted_dirlist:
                            sorted_dirlist.append(dirname)
                    if len(dirlist) != len(sorted_dirlist):
                        logging.warning("len(dirlist) != len(sorted_dirlist)")
                        logging.warning("dirlist: {}".format(dirlist))
                        logging.warning(
                            "sorted_dirlist: {}".format(sorted_dirlist))
                    dirlist = sorted_dirlist

                # do a shallow check of files and folders (i.e. don't check file sizes, modification times etc. in subdirectories)
                for book in dirlist:
                    if not self.shouldRun:
                        break  # break loop if we're shutting down the system (iterating books may take some time)

                    with self._md5_lock:
                        if not os.path.exists(os.path.join(
                                self.dir_path, book)):
                            # iterating over all books can take a lot of time,
                            # and the book may have been deleted by the time we get to it.
                            self.notify_book_event_handlers(book, "deleted")
                            logging.debug("book deleted: {}".format(book))
                            del self._md5[book]
                            continue

                        if book not in self._md5:
                            self._update_md5(book)
                            self.notify_book_event_handlers(book, "created")
                            logging.debug("book created: {}".format(book))
                            continue

                        shallow_md5, _ = Filesystem.path_md5(
                            path=os.path.join(self.dir_path, book),
                            shallow=True,
                            expect=self._md5[book]["shallow"]
                            if book in self._md5 else None)
                        if shallow_md5 != self._md5[book]["shallow"]:
                            self._update_md5(book)
                            self.notify_book_event_handlers(book, "modified")
                            logging.debug(
                                "book modified (top-level dir/file modified): {}"
                                .format(book))
                            continue

                with self._md5_lock:
                    deleted = [
                        book for book in self._md5 if book not in dirlist
                    ]
                    for book in deleted:
                        self.notify_book_event_handlers(book, "deleted")
                        logging.debug("book deleted: {}".format(book))
                        del self._md5[book]
                    if deleted:
                        continue

                self.store_checksums(
                )  # regularly store updated version of checksums

                # do a deep check (size/time etc. of files in subdirectories) of up to 10 books that haven't been checked in a while
                with self._md5_lock:
                    long_time_since_checked = sorted(
                        [{
                            "name": book,
                            "md5": self._md5[book]
                        } for book in self._md5 if time.time() -
                         self._md5[book]["modified"] > self.inactivity_timeout
                         ],
                        key=lambda book: book["md5"]["deep_checked"])
                    long_time_since_checked = [
                        b["name"] for b in long_time_since_checked
                    ]
                    for book in should_deepscan + long_time_since_checked[:10]:
                        if not self.shouldRun:
                            break  # break loop if we're shutting down the system

                        deep_md5, _ = Filesystem.path_md5(
                            path=os.path.join(self.dir_path, book),
                            shallow=False,
                            expect=self._md5[book]["deep"]
                            if book in self._md5 else None)
                        if book not in self._md5:
                            self._update_md5(book)
                        else:
                            self._md5[book]["deep_checked"] = int(time.time())
                            if deep_md5 != self._md5[book]["deep"]:
                                self._md5[book]["modified"] = int(time.time())
                                self._update_md5(book)
                                self.notify_book_event_handlers(
                                    book, "modified")
                                logging.debug("book modified: {}".format(book))

                self.store_checksums(
                )  # regularly store updated version of checksums

            except Exception:
                logging.exception(
                    "En feil oppstod ved overvåking av {}".format(
                        self.dir_path))
                try:
                    Report.emailPlainText(
                        "En feil oppstod ved overvåking av {}".format(
                            self.dir_path),
                        traceback.format_exc(),
                        recipients=[])
                except Exception:
                    logging.exception("Could not e-mail exception")
예제 #6
0
    def _initialize_checksums(self):
        cache_dir = Config.get("cache_dir", None)
        if not cache_dir:
            cache_dir = os.getenv(
                "CACHE_DIR",
                os.path.join(tempfile.gettempdir(), "prodsys-cache"))
            if not os.path.isdir(cache_dir):
                os.makedirs(cache_dir, exist_ok=True)
            Config.set("cache_dir", cache_dir)

        self.cache_file = None
        if not self.dir_id_is_generated:
            self.cache_file = os.path.join(
                cache_dir, "dir.{}.md5.pickle".format(self.dir_id))
            if os.path.isfile(self.cache_file):
                try:
                    with open(self.cache_file, 'rb') as f:
                        self._md5 = pickle.load(f)
                except Exception as e:
                    logging.exception(
                        "Cache file found, but could not parse it", e)
            else:
                logging.debug("Can't find cache file")

        if self._md5:
            logging.debug(
                "Loaded directory status from cache file, doing a partial rescan"
            )

        else:
            logging.debug("Directory status not cached, doing a full rescan")
            self._md5 = {}

        dir_list = Filesystem.list_book_dir(self.dir_path)
        self.status_text = "Looking for created/deleted"

        for book in list(
                self._md5.keys()
        ):  # list(….keys()) to avoid "RuntimeError: dictionary changed size during iteration"
            if not self.shouldRun:
                self._md5 = {}
                return  # break loop if we're shutting down the system
            if book not in dir_list:
                logging.debug(
                    "{} is in cache but not in directory: deleting from cache".
                    format(book))
                del self._md5[book]

        md5_count = 0
        self.status_text = "0 / {}".format(len(dir_list))
        for book in dir_list:
            if not self.shouldRun:
                self._md5 = {}
                return  # break loop if we're shutting down the system
            if book not in self._md5:
                logging.debug(
                    "{} is in directory but not in cache: adding to cache".
                    format(book))
                self._update_md5(book)
            md5_count += 1
            self.status_text = "{} / {}".format(md5_count, len(dir_list))
            if md5_count == 1 or md5_count % 100 == 0:
                logging.info(self.status_text)
            if md5_count % 10 == 0:
                self.store_checksums(
                    while_starting=True
                )  # if for some reason the system crashes, we don't have to start all over again

        self.store_checksums(while_starting=True)
        self.starting = False
        self.status_text = None
        return