def processExtras(myDB=None, dest_path=None, global_name=None, data=None): # given book data, handle calibre autoadd, book image, opf, # and update author and book counts authorname = data[0]['AuthorName'] bookid = data[0]['BookID'] bookname = data[0]['BookName'] bookdesc = data[0]['BookDesc'] bookisbn = data[0]['BookIsbn'] bookimg = data[0]['BookImg'] bookdate = data[0]['BookDate'] booklang = data[0]['BookLang'] bookpub = data[0]['BookPub'] # If you use auto add by Calibre you need the book in a single directory, not nested # So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. if lazylibrarian.IMP_AUTOADD: processAutoAdd(dest_path) # try image processIMG(dest_path, bookimg, global_name) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, bookid, bookpub, bookdate, bookdesc, booklang, global_name) # update books # dest_path is where we put the book after processing, but we don't have the full filename # we don't keep the extension, so look for any "book" in that directory dest_file = book_file(dest_path, booktype='book') if isinstance(dest_file, str): dest_file = dest_file.decode(lazylibrarian.SYS_ENCODING) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Open", "BookFile": dest_file} myDB.upsert("books", newValueDict, controlValueDict) # update authors havebooks = myDB.action( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname).fetchone() controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks['counter']} countauthor = len( myDB.select('SELECT AuthorID FROM authors WHERE AuthorName="%s"' % authorname)) if countauthor: myDB.upsert("authors", newValueDict, controlValueDict)
def processExtras(myDB=None, dest_path=None, global_name=None, data=None): # given book data, handle calibre autoadd, book image, opf, # and update author and book counts authorname = data['AuthorName'] bookid = data['BookID'] bookname = data['BookName'] bookdesc = data['BookDesc'] bookisbn = data['BookIsbn'] bookimg = data['BookImg'] bookdate = data['BookDate'] booklang = data['BookLang'] bookpub = data['BookPub'] # If you use auto add by Calibre you need the book in a single directory, not nested # So take the file you Copied/Moved to Dest_path and copy it to a Calibre auto add folder. if lazylibrarian.IMP_AUTOADD: processAutoAdd(dest_path) # try image processIMG(dest_path, bookimg, global_name) # try metadata processOPF(dest_path, authorname, bookname, bookisbn, bookid, bookpub, bookdate, bookdesc, booklang, global_name) # update books # dest_path is where we put the book after processing, but we don't have the full filename # we don't keep the extension, so look for any "book" in that directory dest_file = book_file(dest_path, booktype='book') if isinstance(dest_file, str): dest_file = dest_file.decode(lazylibrarian.SYS_ENCODING) controlValueDict = {"BookID": bookid} newValueDict = {"Status": "Open", "BookFile": dest_file} myDB.upsert("books", newValueDict, controlValueDict) # update authors havebooks = myDB.match( 'SELECT count("BookID") as counter FROM books WHERE AuthorName="%s" AND (Status="Have" OR Status="Open")' % authorname) controlValueDict = {"AuthorName": authorname} newValueDict = {"HaveBooks": havebooks['counter']} countauthor = len(myDB.select('SELECT AuthorID FROM authors WHERE AuthorName="%s"' % authorname)) if countauthor: myDB.upsert("authors", newValueDict, controlValueDict)
def processAlternate(source_dir=None): # import a book from an alternate directory try: if not source_dir: logger.warn("Alternate Directory not configured") return False elif not os.path.isdir(source_dir): logger.warn("Alternate Directory [%s] not found" % source_dir) return False if source_dir == lazylibrarian.DIRECTORY('Destination'): logger.warn('Alternate directory must not be the same as Destination') return False logger.debug('Processing alternate directory %s' % source_dir) # first, recursively process any books in subdirectories for fname in os.listdir(source_dir): subdir = os.path.join(source_dir, fname) if os.path.isdir(subdir): processAlternate(subdir) # only import one book from each alternate (sub)directory, this is because # the importer may delete the directory after importing a book, # depending on lazylibrarian.DESTINATION_COPY setting # also if multiple books in a folder and only a "metadata.opf" # which book is it for? new_book = book_file(source_dir, booktype='book') if new_book: metadata = {} # see if there is a metadata file in this folder with the info we need # try book_name.opf first, or fall back to any filename.opf metafile = os.path.splitext(new_book)[0] + '.opf' if not os.path.isfile(metafile): metafile = opf_file(source_dir) if metafile and os.path.isfile(metafile): try: metadata = get_book_info(metafile) except Exception as e: logger.debug('Failed to read metadata from %s, %s' % (metafile, str(e))) else: logger.debug('No metadata file found for %s' % new_book) if 'title' not in metadata or 'creator' not in metadata: # if not got both, try to get metadata from the book file try: metadata = get_book_info(new_book) except Exception as e: logger.debug('No metadata found in %s, %s' % (new_book, str(e))) if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname)) if not authmatch: # try goodreads preferred authorname logger.debug("Checking GoodReads for [%s]" % authorname) GR = GoodReads(authorname) try: author_gr = GR.find_author_id() except Exception: logger.debug("No author id for [%s]" % authorname) if author_gr: grauthorname = author_gr['authorname'] logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname)) authorname = grauthorname authmatch = myDB.match('SELECT * FROM authors where AuthorName="%s"' % (authorname)) if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) bookid = find_book_in_db(myDB, authorname, bookname) if bookid: return import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir) return False except Exception as e: logger.error('Unhandled exception in processAlternate: %s' % traceback.format_exc())
def processDir(reset=False): try: threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" processpath = lazylibrarian.DIRECTORY('Download') logger.debug('Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process yet.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: # if torrent, see if we can get current status from the downloader as the name # may have been changed once magnet resolved, or download started or completed # depending on torrent downloader. Usenet doesn't change the name. We like usenet. torrentname = '' try: logger.debug("%s was sent to %s" % (book['NZBtitle'], book['Source'])) if book['Source'] == 'TRANSMISSION': torrentname = transmission.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'UTORRENT': torrentname = utorrent.nameTorrent(book['DownloadID']) elif book['Source'] == 'RTORRENT': torrentname = rtorrent.getName(book['DownloadID']) elif book['Source'] == 'QBITTORRENT': torrentname = qbittorrent.getName(book['DownloadID']) elif book['Source'] == 'SYNOLOGY_TOR': torrentname = synology.getName(book['DownloadID']) elif book['Source'] == 'DELUGEWEBUI': torrentname = deluge.getTorrentFolder(book['DownloadID']) elif book['Source'] == 'DELUGERPC': client = DelugeRPCClient(lazylibrarian.DELUGE_HOST, int(lazylibrarian.DELUGE_PORT), lazylibrarian.DELUGE_USER, lazylibrarian.DELUGE_PASS) try: client.connect() result = client.call('core.get_torrent_status', book['DownloadID'], {}) # for item in result: # logger.debug ('Deluge RPC result %s: %s' % (item, result[item])) if 'name' in result: torrentname = unaccented_str(result['name']) except Exception as e: logger.debug('DelugeRPC failed %s' % str(e)) except Exception as e: logger.debug("Failed to get updated torrent name from %s for %s: %s" % (book['Source'], book['DownloadID'], str(e))) matchtitle = unaccented_str(book['NZBtitle']) if torrentname and torrentname != matchtitle: logger.debug("%s Changing [%s] to [%s]" % (book['Source'], matchtitle, torrentname)) myDB.action('UPDATE wanted SET NZBtitle = "%s" WHERE NZBurl = "%s"' % (torrentname, book['NZBurl'])) matchtitle = torrentname # here we could also check percentage downloaded or eta or status? # If downloader says it hasn't completed, no need to look for it. matches = [] logger.info('Looking for %s in %s' % (matchtitle, processpath)) for fname in downloads: # skip if failed before or incomplete torrents, or incomplete btsync extn = os.path.splitext(fname)[1] if extn not in ['.fail', '.part', '.bts', '.!ut']: # This is to get round differences in torrent filenames. # Usenet is ok, but Torrents aren't always returned with the name we searched for # We ask the torrent downloader for the torrent name, but don't always get an answer # so we try to do a "best match" on the name, there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match and match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here. Book/mag file in download root. # move the file into it's own subdirectory so we don't move/delete things that aren't ours logger.debug('filename [%s] is a file' % os.path.join(processpath, fname)) if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): logger.debug('filename [%s] is a valid book/mag' % os.path.join(processpath, fname)) if bts_file(processpath): logger.debug("Skipping %s, found a .bts file" % processpath) else: fname = os.path.splitext(fname)[0] dirname = os.path.join(processpath, fname) if not os.path.exists(dirname): try: os.makedirs(dirname) setperm(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): # move the book and any related files too # ie other book formats, or opf, jpg with same title # can't move metadata.opf or cover.jpg or similar # as can't be sure they are ours # not sure if we need a new listdir here, or whether we can use the old one list_dir = os.listdir(processpath) for ourfile in list_dir: if ourfile.startswith(fname): if is_valid_booktype(ourfile, booktype="book") \ or is_valid_booktype(ourfile, booktype="mag") \ or os.path.splitext(ourfile)[1].lower() in ['.opf', '.jpg']: try: if lazylibrarian.DESTINATION_COPY: shutil.copyfile(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) else: shutil.move(os.path.join(processpath, ourfile), os.path.join(dirname, ourfile)) setperm(os.path.join(dirname, ourfile)) except Exception as why: logger.debug("Failed to copy/move file %s to %s, %s" % (ourfile, dirname, str(why))) pp_path = os.path.join(processpath, fname) if os.path.isdir(pp_path): logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, matchtitle)) if not os.listdir(pp_path): logger.debug("Skipping %s, folder is empty" % pp_path) elif bts_file(pp_path): logger.debug("Skipping %s, found a .bts file" % pp_path) else: matches.append([match, pp_path, book]) else: pp_path = os.path.join(processpath, fname) matches.append([match, pp_path, book]) # so we can report closest match else: logger.debug('Skipping %s' % fname) match = 0 if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] if match and match >= lazylibrarian.DLOAD_RATIO: logger.debug(u'Found match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.match('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: # it's a book logger.debug(u'Processing book %s' % book['BookID']) authorname = data['AuthorName'] bookname = data['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(processpath, dest_path).encode(lazylibrarian.SYS_ENCODING) else: data = myDB.match('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # it's a magazine logger.debug(u'Processing magazine %s' % book['BookID']) # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(processpath, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) else: # not recognised logger.debug('Nothing in database matching "%s"' % book['BookID']) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) if match: logger.debug(u'Closest match (%s%%): %s' % (match, pp_path)) #for match in matches: # logger.info('Match: %s%% %s' % (match[0], match[1])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"BookID": book['BookID'], "NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) # calibre or ll copied/moved the files we want, now delete source files to_delete = True if book['NZBmode'] in ['torrent', 'magnet']: # Only delete torrents if we don't want to keep seeding if lazylibrarian.KEEP_SEEDING: logger.warn('%s is seeding %s %s' % (book['Source'], book['NZBmode'], book['NZBtitle'])) to_delete = False else: # ask downloader to delete the torrent, but not the files # we may delete them later, depending on other settings if book['DownloadID'] != "unknown": logger.debug('Removing %s from %s' % (book['NZBtitle'], book['Source'].lower())) delete_task(book['Source'], book['DownloadID'], False) else: logger.warn("Unable to remove %s from %s, no DownloadID" % (book['NZBtitle'], book['Source'].lower())) if to_delete: # only delete the files if not in download root dir and if DESTINATION_COPY not set if not lazylibrarian.DESTINATION_COPY and (pp_path != processpath): if os.path.isdir(pp_path): # calibre might have already deleted it? try: shutil.rmtree(pp_path) except Exception as why: logger.debug("Unable to remove %s, %s" % (pp_path, str(why))) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s from %s at %s" % (global_name, book['NZBprov'], now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except Exception as e: logger.debug("Unable to rename %s, %s" % (pp_path, str(e))) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: dname, extn = os.path.splitext(directory) if "LL.(" in dname and extn not in ['.fail', '.part', '.bts', '.!ut']: bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " found in download directory") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) # Now check for any that are still marked snatched... if lazylibrarian.TASK_AGE: snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) > 0: for snatch in snatched: # FUTURE: we could check percentage downloaded or eta? # if percentage is increasing, it's just slow try: when_snatched = time.strptime(snatch['NZBdate'], '%Y-%m-%d %H:%M:%S') when_snatched = time.mktime(when_snatched) diff = time.time() - when_snatched # time difference in seconds except: diff = 0 hours = int(diff / 3600) if hours >= lazylibrarian.TASK_AGE: logger.warn('%s was sent to %s %s hours ago, deleting failed task' % (snatch['NZBtitle'], snatch['Source'].lower(), hours)) # change status to "Failed", and ask downloader to delete task and files if snatch['BookID'] != 'unknown': myDB.action('UPDATE wanted SET Status="Failed" WHERE BookID="%s"' % snatch['BookID']) myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % snatch['BookID']) delete_task(snatch['Source'], snatch['DownloadID'], True) if reset: scheduleJob(action='Restart', target='processDir') except Exception as e: logger.error('Unhandled exception in processDir: %s' % traceback.format_exc())
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir(lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural(len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug('Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move(os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug("Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.info(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower() and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace('/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace('$Author', authorname).replace( '$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace('$Author', authorname).replace( '$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0]['IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = {'<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': ''} mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode(lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace('$IssueDate', book['AuxInfo']).replace( '$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug("Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Processed", "NZBDate": now()} # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug('Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str(book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int(book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = {"LastAcquired": today(), "IssueStatus": "Open"} else: newValueDict = {"IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open"} myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = {"Title": book['BookID'], "IssueDate": book['AuxInfo']} newValueDict = {"IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s at %s" % (global_name, now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action('UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir(processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')
def processAlternate(source_dir=None): # import a book from an alternate directory if not source_dir or os.path.isdir(source_dir) is False: logger.warn('Alternate directory not found') return if source_dir == lazylibrarian.DESTINATION_DIR: logger.warn('Alternate directory must not be the same as destination') return logger.debug('Processing alternate directory %s' % source_dir) # first, recursively process any books in subdirectories for fname in os.listdir(source_dir): subdir = os.path.join(source_dir, fname) if os.path.isdir(subdir): processAlternate(subdir) # only import one book from each alternate (sub)directory, this is because # the importer may delete the directory after importing a book, # depending on lazylibrarian.DESTINATION_COPY setting # also if multiple books in a folder and only a "metadata.opf" # which book is it for? new_book = book_file(source_dir, booktype='book') if new_book: metadata = {} # see if there is a metadata file in this folder with the info we need # try book_name.opf first, or fall back to any filename.opf metafile = os.path.splitext(new_book)[0] + '.opf' if not os.path.isfile(metafile): metafile = opf_file(source_dir) if os.path.isfile(metafile): try: metadata = get_book_info(metafile) except: logger.debug('Failed to read metadata from %s' % metafile) else: logger.debug('No metadata file found for %s' % new_book) if not 'title' in metadata and 'creator' in metadata: # try to get metadata from the book file try: metadata = get_book_info(new_book) except: logger.debug('No metadata found in %s' % new_book) if 'title' in metadata and 'creator' in metadata: authorname = metadata['creator'] bookname = metadata['title'] myDB = database.DBConnection() authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if not authmatch: # try goodreads preferred authorname logger.debug("Checking GoodReads for [%s]" % authorname) GR = GoodReads(authorname) try: author_gr = GR.find_author_id() except: logger.debug("No author id for [%s]" % authorname) if author_gr: grauthorname = author_gr['authorname'] logger.debug("GoodReads reports [%s] for [%s]" % (grauthorname, authorname)) authorname = grauthorname authmatch = myDB.action( 'SELECT * FROM authors where AuthorName="%s"' % (authorname)).fetchone() if authmatch: logger.debug("ALT: Author %s found in database" % (authorname)) else: logger.debug("ALT: Author %s not found, adding to database" % (authorname)) addAuthorToDB(authorname) bookid = find_book_in_db(myDB, authorname, bookname) if bookid: import_book(source_dir, bookid) else: logger.warn("Book %s by %s not found in database" % (bookname, authorname)) else: logger.warn('Book %s has no metadata, unable to import' % new_book) else: logger.warn("No book file found in %s" % source_dir)
def processDir(reset=False): threadname = threading.currentThread().name if "Thread-" in threadname: threading.currentThread().name = "POSTPROCESS" if not lazylibrarian.DOWNLOAD_DIR or not os.path.isdir( lazylibrarian.DOWNLOAD_DIR): processpath = os.getcwd() else: processpath = lazylibrarian.DOWNLOAD_DIR logger.debug(' Checking [%s] for files to post process' % processpath) try: downloads = os.listdir(processpath) except OSError as why: logger.error('Could not access [%s] directory [%s]' % (processpath, why.strerror)) return myDB = database.DBConnection() snatched = myDB.select('SELECT * from wanted WHERE Status="Snatched"') if len(snatched) == 0: logger.info('Nothing marked as snatched.') scheduleJob(action='Stop', target='processDir') return if len(downloads) == 0: logger.info('No downloads are found. Nothing to process.') return logger.info("Checking %s download%s for %s snatched file%s" % (len(downloads), plural( len(downloads)), len(snatched), plural(len(snatched)))) ppcount = 0 for book in snatched: matches = [] for fname in downloads: if not fname.endswith('.fail'): # has this failed before? # this is to get round differences in torrent filenames. # Torrents aren't always returned with the name we searched for # there might be a better way... if isinstance(fname, str): matchname = fname.decode(lazylibrarian.SYS_ENCODING) else: matchname = fname if ' LL.(' in matchname: matchname = matchname.split(' LL.(')[0] matchtitle = book['NZBtitle'] match = 0 if matchtitle: if ' LL.(' in matchtitle: matchtitle = matchtitle.split(' LL.(')[0] match = fuzz.token_set_ratio(matchtitle, matchname) if match >= lazylibrarian.DLOAD_RATIO: fname = matchname if os.path.isfile(os.path.join(processpath, fname)): # handle single file downloads here... if is_valid_booktype(fname, booktype="book") \ or is_valid_booktype(fname, booktype="mag"): dirname = os.path.join(processpath, os.path.splitext(fname)[0]) if not os.path.exists(dirname): try: os.makedirs(dirname) except OSError as why: logger.debug( 'Failed to create directory %s, %s' % (dirname, why.strerror)) if os.path.exists(dirname): try: shutil.move( os.path.join(processpath, fname), os.path.join(dirname, fname)) fname = os.path.splitext(fname)[0] except Exception as why: logger.debug( "Failed to move file %s to %s, %s" % (fname, dirname, str(why))) if os.path.isdir(os.path.join(processpath, fname)): pp_path = os.path.join(processpath, fname) logger.debug('Found folder (%s%%) %s for %s' % (match, pp_path, book['NZBtitle'])) matches.append([match, pp_path, book]) else: logger.debug('No match (%s%%) %s for %s' % (match, matchname, matchtitle)) else: logger.debug('Skipping %s' % fname) if matches: highest = max(matches, key=lambda x: x[0]) match = highest[0] pp_path = highest[1] book = highest[2] logger.info(u'Best match (%s%%): %s for %s' % (match, pp_path, book['NZBtitle'])) data = myDB.select('SELECT * from books WHERE BookID="%s"' % book['BookID']) if data: authorname = data[0]['AuthorName'] bookname = data[0]['BookName'] if 'windows' in platform.system().lower( ) and '/' in lazylibrarian.EBOOK_DEST_FOLDER: logger.warn('Please check your EBOOK_DEST_FOLDER setting') lazylibrarian.EBOOK_DEST_FOLDER = lazylibrarian.EBOOK_DEST_FOLDER.replace( '/', '\\') # Default destination path, should be allowed change per config file. dest_path = lazylibrarian.EBOOK_DEST_FOLDER.replace( '$Author', authorname).replace('$Title', bookname) global_name = lazylibrarian.EBOOK_DEST_FILE.replace( '$Author', authorname).replace('$Title', bookname) global_name = unaccented(global_name) # dest_path = authorname+'/'+bookname # global_name = bookname + ' - ' + authorname # Remove characters we don't want in the filename BEFORE adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } dest_path = unaccented_str(replace_all(dest_path, dic)) dest_path = os.path.join(lazylibrarian.DESTINATION_DIR, dest_path).encode( lazylibrarian.SYS_ENCODING) else: data = myDB.select('SELECT * from magazines WHERE Title="%s"' % book['BookID']) if data: # AuxInfo was added for magazine release date, normally housed in 'magazines' but if multiple # files are downloading, there will be an error in post-processing, trying to go to the # same directory. mostrecentissue = data[0][ 'IssueDate'] # keep for processing issues arriving out of order # Remove characters we don't want in the filename before (maybe) adding to DESTINATION_DIR # as windows drive identifiers have colon, eg c: but no colons allowed elsewhere? dic = { '<': '', '>': '', '...': '', ' & ': ' ', ' = ': ' ', '?': '', '$': 's', ' + ': ' ', '"': '', ',': '', '*': '', ':': '', ';': '', '\'': '' } mag_name = unaccented_str(replace_all(book['BookID'], dic)) # book auxinfo is a cleaned date, eg 2015-01-01 dest_path = lazylibrarian.MAG_DEST_FOLDER.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) # dest_path = '_Magazines/'+title+'/'+book['AuxInfo'] if lazylibrarian.MAG_RELATIVE: if dest_path[0] not in '._': dest_path = '_' + dest_path dest_path = os.path.join( lazylibrarian.DESTINATION_DIR, dest_path).encode(lazylibrarian.SYS_ENCODING) else: dest_path = dest_path.encode( lazylibrarian.SYS_ENCODING) authorname = None bookname = None global_name = lazylibrarian.MAG_DEST_FILE.replace( '$IssueDate', book['AuxInfo']).replace('$Title', mag_name) global_name = unaccented(global_name) # global_name = book['AuxInfo']+' - '+title else: logger.debug( "Snatched magazine %s is not in download directory" % (book['BookID'])) continue else: logger.debug("Snatched %s %s is not in download directory" % (book['NZBmode'], book['NZBtitle'])) continue processBook = processDestination(pp_path, dest_path, authorname, bookname, global_name) if processBook: logger.debug("Processing %s, %s" % (global_name, book['NZBurl'])) # update nzbs, only update the snatched ones in case multiple matches for same book / magazine issue controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = { "Status": "Processed", "NZBDate": now() } # say when we processed it myDB.upsert("wanted", newValueDict, controlValueDict) if bookname is not None: # it's a book, if None it's a magazine if len(lazylibrarian.IMP_CALIBREDB): logger.debug( 'Calibre should have created the extras for us') else: processExtras(myDB, dest_path, global_name, data) else: # update mags controlValueDict = {"Title": book['BookID']} if mostrecentissue: if mostrecentissue.isdigit() and str( book['AuxInfo']).isdigit(): older = int(mostrecentissue) > int( book['AuxInfo']) # issuenumber else: older = mostrecentissue > book['AuxInfo'] # YYYY-MM-DD else: older = False if older: # check this in case processing issues arriving out of order newValueDict = { "LastAcquired": today(), "IssueStatus": "Open" } else: newValueDict = { "IssueDate": book['AuxInfo'], "LastAcquired": today(), "IssueStatus": "Open" } myDB.upsert("magazines", newValueDict, controlValueDict) # dest_path is where we put the magazine after processing, but we don't have the full filename # so look for any "book" in that directory dest_file = book_file(dest_path, booktype='mag') controlValueDict = { "Title": book['BookID'], "IssueDate": book['AuxInfo'] } newValueDict = { "IssueAcquired": today(), "IssueFile": dest_file, "IssueID": create_id("%s %s" % (book['BookID'], book['AuxInfo'])) } myDB.upsert("issues", newValueDict, controlValueDict) # create a thumbnail cover for the new issue create_cover(dest_file) logger.info('Successfully processed: %s' % global_name) ppcount = ppcount + 1 notify_download("%s at %s" % (global_name, now())) else: logger.error('Postprocessing for %s has failed.' % global_name) logger.error('Warning - Residual files remain in %s.fail' % pp_path) controlValueDict = {"NZBurl": book['NZBurl'], "Status": "Snatched"} newValueDict = {"Status": "Failed", "NZBDate": now()} myDB.upsert("wanted", newValueDict, controlValueDict) # if it's a book, reset status so we try for a different version # if it's a magazine, user can select a different one from pastissues table if bookname is not None: myDB.action( 'UPDATE books SET status = "Wanted" WHERE BookID="%s"' % book['BookID']) # at this point, as it failed we should move it or it will get postprocessed # again (and fail again) try: os.rename(pp_path, pp_path + '.fail') except: logger.debug("Unable to rename %s" % pp_path) downloads = os.listdir( processpath) # check in case we processed/deleted some above for directory in downloads: if "LL.(" in directory and not directory.endswith('.fail'): bookID = str(directory).split("LL.(")[1].split(")")[0] logger.debug("Book with id: " + str(bookID) + " is in downloads") pp_path = os.path.join(processpath, directory) if os.path.isfile(pp_path): pp_path = os.path.join(processpath) if (os.path.isdir(pp_path)): logger.debug('Found LL folder %s.' % pp_path) if import_book(pp_path, bookID): ppcount = ppcount + 1 if ppcount == 0: logger.info('No snatched books/mags have been found') else: logger.info('%s book%s/mag%s processed.' % (ppcount, plural(ppcount), plural(ppcount))) if reset: scheduleJob(action='Restart', target='processDir')