def cleanBySourceKey(self, sourceKey, includePhash=True, pathPositiveFilter=None): self.log.info("Getting fetched items from database for source: %s", sourceKey) with self.context_cursor() as cur: cur.execute('''SELECT dbid, filename, downloadpath, tags FROM mangaitems WHERE sourcesite=%s and dlstate=2;''', (sourceKey, )) ret = cur.fetchall() self.log.info("Found %s items from source %s.", len(ret), sourceKey) for dbid, filename, downloadpath, tags in ret: if not tags: tags = "" taglist = tags.split() fpath = os.path.join(downloadpath, filename) if tags and 'dup-checked' in taglist: self.log.info("File %s was dup-checked in the current session. Skipping.", fpath) continue if tags and 'was-duplicate' in taglist: continue if not filename or not downloadpath: self.log.error("Invalid path info: '%s', '%s'", downloadpath, filename) if not os.path.exists(fpath): continue proc = processDownload.MangaProcessor() tags = proc.processDownload(seriesName=None, archivePath=fpath, doUpload=False) tags += " dup-checked" self.log.info("Adding tags: '%s'", tags) self.addTag(fpath, tags)
def reprocessFailedH(self): with self.context_cursor() as cur: cur.execute('''SELECT dbid, filename, downloadpath, tags FROM hentaiitems WHERE tags LIKE %s;''', ('%unprocessable%', )) ret = cur.fetchall() for dbid, fname, dpath, tags in ret: basePath = os.path.join(dpath, fname) tags = tags.split(" ") badtags = ['unprocessable', 'corrupt'] for bad in badtags: while bad in tags: tags.remove(bad) print(os.path.exists(basePath), basePath) print(tags) proc = processDownload.MangaProcessor() tags = proc.processDownload(seriesName=None, archivePath=basePath, pathPositiveFilter=None) self.addTag(basePath, tags)
def __init__(self): self.proc = [ processDownload.MangaProcessor(), processDownload.HentaiProcessor() ] self.log = logging.getLogger(self.loggerPath) badIms = os.listdir(settings.badImageDir) self.badHashes = [] for im in badIms: with open(os.path.join(settings.badImageDir, im), "rb") as fp: md5 = hashlib.md5() md5.update(fp.read()) self.badHashes.append(md5.hexdigest()) self.log.info("Bad Image = '%s', Hash = '%s'", im, md5.hexdigest())
def cleanSingleDir(self, dirPath, delDir, includePhash=True, pathFilter=['']): self.log.info("Processing subdirectory '%s'", dirPath) if not dirPath.endswith("/"): dirPath = dirPath + '/' items = os.listdir(dirPath) items = [os.path.join(dirPath, item) for item in items] dirs = [i for i in items if os.path.isdir(i)] self.log.info("Recursing into %s subdirectories!", len(dirs)) for subDir in dirs: self.cleanSingleDir(subDir, delDir, includePhash, pathFilter) parsedItems = [(os.path.getsize(i), i) for i in items if os.path.isfile(i)] parsedItems.sort() for dummy_num, basePath in parsedItems: try: # if not deduplicator.archChecker.ArchChecker.isArchive(basePath): # print("Not archive!", basePath) # continue self.log.info("Scanning '%s'", basePath) proc = processDownload.MangaProcessor() tags = proc.processDownload(seriesName=None, archivePath=basePath, pathFilter=pathFilter) self.addTag(basePath, tags) except KeyboardInterrupt: raise
def cleanBySourceKey(self, sourceKey, delDir, includePhash=True, pathFilter=['']): with self.conn.cursor() as cur: cur.execute( '''SELECT dbid, filename, downloadpath FROM mangaitems WHERE sourcesite=%s;''', (sourceKey, )) ret = cur.fetchall() # print(ret) parsedItems = [] for dbId, fName, fPath in ret: if not fName or not fPath: continue fqpath = os.path.join(fPath, fName) if not os.path.exists(fqpath): continue parsedItems.append((dbId, fqpath)) for dummy_num, basePath in parsedItems: try: self.log.info("Scanning '%s'", basePath) proc = processDownload.MangaProcessor() tags = proc.processDownload(seriesName=None, archivePath=basePath, pathFilter=pathFilter) self.addTag(basePath, tags) except KeyboardInterrupt: raise