def renameSeriesToMatchMangaUpdates(scanpath): idLut = nt.MtNamesMapWrapper("fsName->buId") muLut = nt.MtNamesMapWrapper("buId->buName") db = DbInterface() print("Scanning") foundDirs = 0 contents = os.listdir(scanpath) for dirName in contents: cName = nt.prepFilenameForMatching(dirName) mtId = idLut[cName] if mtId and len(mtId) > 1: print("Multiple mtId values for '%s' ('%s')" % (cName, dirName)) print(" ", mtId) print(" Skipping item") elif mtId: mtId = mtId.pop() mtName = muLut[mtId].pop() cMtName = nt.prepFilenameForMatching(mtName) if cMtName != cName: print("Dir '%s' ('%s')" % (cName, dirName)) print(" Should be '%s'" % (mtName, )) print(" URL: https://www.mangaupdates.com/series.html?id=%s" % (mtId, )) oldPath = os.path.join(scanpath, dirName) newPath = os.path.join(scanpath, nt.makeFilenameSafe(mtName)) if not os.path.isdir(oldPath): raise ValueError("Not a dir. Wat?") print(" old '%s'" % (oldPath, )) print(" new '%s'" % (newPath, )) newCl = nt.cleanUnicode(newPath) if newCl != newPath: print("Unicode oddness. Skipping") continue rating = nt.extractRatingToFloat(oldPath) if rating != 0: print(" Need to add rating = ", rating) mv = query_response_bool(" rename?") if mv: # if os.path.exists(newPath): print("Target dir exists! Moving files instead") moveFiles(oldPath, newPath) os.rmdir(oldPath) nt.dirNameProxy.changeRatingPath(newPath, rating) else: os.rename(oldPath, newPath) nt.dirNameProxy.changeRatingPath(newPath, rating) foundDirs += 1 print("Total directories that need renaming", foundDirs)
def consolicateSeriesToSingleDir(): print( "Looking for series directories that can be flattened to a single dir") idLut = nt.MtNamesMapWrapper("buId->fsName") db = DbInterface() for key, luDict in nt.dirNameProxy.iteritems(): # print("Key = ", key) mId = db.getIdFromDirName(key) # Skip cases where we have no match if not mId: continue dups = set() for name in idLut[mId]: cName = nt.prepFilenameForMatching(name) # Skip if it's one of the manga names that falls apart under the directory name cleaning mechanism if not cName: continue if cName in nt.dirNameProxy: dups.add(cName) db.getIdFromDirName(cName) if len(dups) > 1: row = db.getRowByValue(buId=mId) targetName = nt.prepFilenameForMatching(row["buName"]) dest = nt.dirNameProxy[targetName] if luDict["dirKey"] != targetName and dest["fqPath"]: print("baseName = ", row["buName"], ", id = ", mId, ", names = ", dups) print(" URL: https://www.mangaupdates.com/series.html?id=%s" % (mId, )) print(" Dir 1 ", luDict["fqPath"]) print(" Dir 2 ", dest["fqPath"]) dirName = os.path.split(luDict["fqPath"])[-1] dir2Name = os.path.split(dest["fqPath"])[-1] print(" 1: ", dirName, ' ->', nt.getCanonicalMangaUpdatesName(dirName)) print(" 2: ", dir2Name, ' ->', nt.getCanonicalMangaUpdatesName(dir2Name)) print(" 1: ({num} items)".format( num=len(os.listdir(luDict["fqPath"])))) print(" 2: ({num} items)".format( num=len(os.listdir(dest["fqPath"])))) doMove = query_response( "move files ('f' dir 1 -> dir 2. 'r' dir 2 -> dir 1. 'n' do not move)?" ) if doMove == "forward": moveFiles(luDict["fqPath"], dest["fqPath"]) os.rmdir(luDict["fqPath"]) elif doMove == "reverse": moveFiles(dest["fqPath"], luDict["fqPath"]) os.rmdir(dest["fqPath"])
def consolicateSeriesToSingleDir(): print("Looking for series directories that can be flattened to a single dir") idLut = nt.MtNamesMapWrapper("buId->fsName") db = DbInterface() for key, luDict in nt.dirNameProxy.iteritems(): # print("Key = ", key) mId = db.getIdFromDirName(key) # Skip cases where we have no match if not mId: continue dups = set() for name in idLut[mId]: cName = nt.prepFilenameForMatching(name) # Skip if it's one of the manga names that falls apart under the directory name cleaning mechanism if not cName: continue if cName in nt.dirNameProxy: dups.add(cName) db.getIdFromDirName(cName) if len(dups) > 1: row = db.getRowByValue(buId=mId) targetName = nt.prepFilenameForMatching(row["buName"]) dest = nt.dirNameProxy[targetName] if luDict["dirKey"] != targetName and dest["fqPath"]: print("baseName = ", row["buName"], ", id = ", mId, ", names = ", dups) print(" URL: https://www.mangaupdates.com/series.html?id=%s" % (mId, )) print(" Dir 1 ", luDict["fqPath"]) print(" Dir 2 ", dest["fqPath"]) dirName = os.path.split(luDict["fqPath"])[-1] dir2Name = os.path.split(dest["fqPath"])[-1] print(" 1: ", dirName, ' ->', nt.getCanonicalMangaUpdatesName(dirName)) print(" 2: ", dir2Name, ' ->', nt.getCanonicalMangaUpdatesName(dir2Name)) print(" 1: ({num} items)".format(num=len(os.listdir(luDict["fqPath"])))) print(" 2: ({num} items)".format(num=len(os.listdir(dest["fqPath"])))) doMove = query_response("move files ('f' dir 1 -> dir 2. 'r' dir 2 -> dir 1. 'n' do not move)?") if doMove == "forward": moveFiles(luDict["fqPath"], dest["fqPath"]) os.rmdir(luDict["fqPath"]) elif doMove == "reverse": moveFiles(dest["fqPath"], luDict["fqPath"]) os.rmdir(dest["fqPath"])
def insertNames(self, buId, names): self.log.info("Updating name synonym table for %s with %s name(s).", buId, len(names)) with self.transaction() as cur: # delete the old names from the table, so if they're removed from the source, we'll match that. cur.execute("DELETE FROM {tableName} WHERE buId=%s;".format(tableName=self.nameMapTableName), (buId, )) alreadyAddedNames = [] for name in names: fsSafeName = nt.prepFilenameForMatching(name) if not fsSafeName: fsSafeName = nt.makeFilenameSafe(name) # we have to block duplicate names. Generally, it's pretty common # for multiple names to screen down to the same name after # passing through `prepFilenameForMatching()`. if fsSafeName in alreadyAddedNames: continue alreadyAddedNames.append(fsSafeName) cur.execute("""INSERT INTO %s (buId, name, fsSafeName) VALUES (%%s, %%s, %%s);""" % self.nameMapTableName, (buId, name, fsSafeName)) self.log.info("Updated!")
def loadRemoteDirectory(self, fullPath, aggregate=False): ret = {} for dirName, stats in self.ftp.mlsd(fullPath): # Skip items that aren't directories if stats["type"]!="dir": continue canonName = nt.getCanonicalMangaUpdatesName(dirName) matchingName = nt.prepFilenameForMatching(canonName) fqPath = os.path.join(fullPath, dirName) # matchName = os.path.split(ret[matchingName])[-1] if matchingName in ret: # if aggregate: # fqPath = self.aggregateDirs(fullPath, dirName, matchName) # else: if COMPLAIN_ABOUT_DUPS: self.log.warning("Duplicate directories for series '%s'!", canonName) self.log.warning(" '%s'", dirName) self.log.warning(" '%s'", matchingName) ret[matchingName] = fqPath else: ret[matchingName] = fqPath return ret
def regenerateNameMappings(self): cur = self.conn.cursor() cur.execute("BEGIN ;") cur.execute("SELECT dbId, name, fsSafeName FROM munamelist;") rows = cur.fetchall() print("Processing %s items" % len(rows)) cnt = 0 for row in rows: dbId, name, fsSafeName = row prepped = nt.prepFilenameForMatching(name) if not prepped or (len(name) - len(prepped)) > 2: continue if prepped != fsSafeName: print("Bad match", row, prepped) cur.execute( "UPDATE munamelist SET fsSafeName=%s WHERE dbId=%s", (prepped, dbId)) cnt += 1 if cnt % 1000 == 0: print("ON row ", cnt) cur.execute("COMMIT;") nt.dirNameProxy.stop()
def getUploadDirectory(self, seriesName): ulDir = self.getExistingDir(seriesName) if not ulDir: seriesName = nt.getCanonicalMangaUpdatesName(seriesName) safeFilename = nt.makeFilenameSafe(seriesName) matchName = nt.prepFilenameForMatching(seriesName) matchName = matchName.encode('utf-8', 'ignore').decode('utf-8') self.checkInitDirs() if matchName in self.mainDirs: ulDir = self.mainDirs[matchName][0] elif seriesName in self.mainDirs: ulDir = self.mainDirs[seriesName][0] else: self.log.info("Need to create container directory for %s", seriesName) ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename) try: self.sftp.mkdir(ulDir) except OSError as e: # If the error is just a "directory exists" warning, ignore it silently if str(e) == 'OSError: File already exists': pass else: self.log.warn("Error creating directory?") self.log.warn(traceback.format_exc()) return ulDir
def getUploadDirectory(self, seriesName): ulDir = self.getExistingDir(seriesName) if not ulDir: seriesName = nt.getCanonicalMangaUpdatesName(seriesName) safeFilename = nt.makeFilenameSafe(seriesName) matchName = nt.prepFilenameForMatching(seriesName) matchName = matchName.encode('latin-1', 'ignore').decode('latin-1') self.checkInitDirs() if matchName in self.unsortedDirs: ulDir = self.unsortedDirs[matchName] elif safeFilename in self.unsortedDirs: ulDir = self.unsortedDirs[safeFilename] else: self.log.info("Need to create container directory for %s", seriesName) ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename) try: self.ftp.mkd(ulDir) except ftplib.error_perm as e: # If the error is just a "directory exists" warning, ignore it silently if str(e).startswith("550") and str(e).endswith('File exists'): pass else: self.log.warn("Error creating directory?") self.log.warn(traceback.format_exc()) return ulDir
def getDoujinshiUploadDirectory(self, seriesName): ulDir = self.getExistingDir(seriesName) if not ulDir: seriesName = nt.getCanonicalMangaUpdatesName(seriesName) safeFilename = nt.makeFilenameSafe(seriesName) matchName = nt.prepFilenameForMatching(seriesName) matchName = matchName.encode('latin-1', 'ignore').decode('latin-1') self.checkInitDirs() if matchName in self.unsortedDirs: ulDir = self.unsortedDirs[matchName] elif safeFilename in self.unsortedDirs: ulDir = self.unsortedDirs[safeFilename] else: self.log.info("Need to create container directory for %s", seriesName) ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename) try: self.sftp.mkdir(ulDir) except ftplib.error_perm: self.log.warn("Directory exists?") self.log.warn(traceback.format_exc()) return ulDir
def aggregateDirs(self, pathBase_1, pathBase_2, dir1, dir2): canonName = nt.getCanonicalMangaUpdatesName(dir1) canonNameAlt = nt.getCanonicalMangaUpdatesName(dir2) cname1 = nt.prepFilenameForMatching(canonName) cname2 = nt.prepFilenameForMatching(canonNameAlt) if canonName.lower() != canonNameAlt.lower(): self.log.critical( "Error in uploading file. Name lookup via MangaUpdates table not commutative!" ) self.log.critical("First returned value '%s'", canonName) self.log.critical("For directory with path '%s'", dir1) self.log.critical("Second returned value '%s'", canonNameAlt) self.log.critical("For directory with path '%s'", dir2) self.log.critical("After cleaning: '%s', '%s', equal: '%s'", cname1, cname2, cname1 == cname2) raise CanonMismatch("Identical and yet not? '%s' - '%s'" % (canonName, canonNameAlt)) self.log.info("Aggregating directories for canon name '%s':", canonName) n1 = lv.distance(dir1, canonName) n2 = lv.distance(dir2, canonName) self.log.info(" %s - '%s'", n1, dir1) self.log.info(" %s - '%s'", n2, dir2) # I'm using less then or equal, so situations where # both names are equadistant get aggregated anyways. if n1 <= n2: src = os.path.join(pathBase_2, dir2) dst = os.path.join(pathBase_1, dir1) else: src = os.path.join(pathBase_1, dir1) dst = os.path.join(pathBase_2, dir2) self.moveItemsInDir(src, dst) self.log.info("Removing directory '%s'", src) try: self.sftp.mkdir("/Admin cleanup/autoclean dirs") except: pass self.sftp.rename( src, "/Admin cleanup/autoclean dirs/garbage dir %s" % src.replace("/", ";").replace(" ", "_")) return dst
def regenLndbCleanedNames(self): self.openDB() self.log.info("Regenerating LNDB lookup column table") with self.transaction() as cur: cur.execute("""SELECT dbid, ctitle FROM books_lndb;""") ret = cur.fetchall() for dbId, cTitle in ret: cleaned = nt.prepFilenameForMatching(cTitle) cur.execute("""UPDATE books_lndb SET cleanedTitle=%s WHERE dbid=%s;""", (cleaned, dbId)) print(dbId, cleaned, cTitle)
def two_arg_lookup(val): print("Passed name = '%s'" % val) import nameTools as nt haveLookup = nt.haveCanonicalMangaUpdatesName(val) if not haveLookup: print("Item not found in MangaUpdates name synonym table") print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val)) else: print("Item found in lookup table!") print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) )
def aggregateDirs(self, pathBase_1, pathBase_2, dir1, dir2): canonName = nt.getCanonicalMangaUpdatesName(dir1) canonNameAlt = nt.getCanonicalMangaUpdatesName(dir2) cname1 = nt.prepFilenameForMatching(canonName) cname2 = nt.prepFilenameForMatching(canonNameAlt) if canonName.lower() != canonNameAlt.lower(): self.log.critical("Error in uploading file. Name lookup via MangaUpdates table not commutative!") self.log.critical("First returned value '%s'", canonName) self.log.critical("For directory with path '%s'", dir1) self.log.critical("Second returned value '%s'", canonNameAlt) self.log.critical("For directory with path '%s'", dir2) self.log.critical("After cleaning: '%s', '%s', equal: '%s'", cname1, cname2, cname1 == cname2) raise CanonMismatch("Identical and yet not? '%s' - '%s'" % (canonName, canonNameAlt)) self.log.info("Aggregating directories for canon name '%s':", canonName) n1 = lv.distance(dir1, canonName) n2 = lv.distance(dir2, canonName) self.log.info(" %s - '%s'", n1, dir1) self.log.info(" %s - '%s'", n2, dir2) # I'm using less then or equal, so situations where # both names are equadistant get aggregated anyways. if n1 <= n2: src = os.path.join(pathBase_2, dir2) dst = os.path.join(pathBase_1, dir1) else: src = os.path.join(pathBase_1, dir1) dst = os.path.join(pathBase_2, dir2) self.moveItemsInDir(src, dst) self.log.info("Removing directory '%s'", src) try: self.sftp.mkdir("/Admin cleanup/autoclean dirs") except: pass self.sftp.rename(src, "/Admin cleanup/autoclean dirs/garbage dir %s" % src.replace("/", ";").replace(" ", "_")) return dst
def insertNames(self, buId, names): with self.conn.cursor() as cur: for name in names: fsSafeName = nt.prepFilenameForMatching(name) cur.execute("""SELECT COUNT(*) FROM munamelist WHERE buId=%s AND name=%s;""", (buId, name)) ret = cur.fetchone() if not ret[0]: cur.execute("""INSERT INTO munamelist (buId, name, fsSafeName) VALUES (%s, %s, %s);""", (buId, name, fsSafeName)) else: print("wat", ret[0], bool(ret[0]))
def insertNames(self, buId, names): with self.get_cursor() as cur: for name in names: fsSafeName = nt.prepFilenameForMatching(name) cur.execute("""SELECT COUNT(*) FROM munamelist WHERE buId=%s AND name=%s;""", (buId, name)) ret = cur.fetchone() if not ret[0]: cur.execute("""INSERT INTO munamelist (buId, name, fsSafeName) VALUES (%s, %s, %s);""", (buId, name, fsSafeName)) else: print("wat", ret[0], bool(ret[0]))
def loadRemoteDirectory(self, fullPath, aggregate=False): ret = {} for dirName, stats in self.ftp.mlsd(fullPath): dirName = ftfy.fix_text(dirName) # Skip items that aren't directories if stats["type"] != "dir": continue canonName = nt.getCanonicalMangaUpdatesName(dirName) matchingName = nt.prepFilenameForMatching(canonName) fqPath = os.path.join(fullPath, dirName) if matchingName in ret: if aggregate: matchName = os.path.split(ret[matchingName])[-1] try: fqPath = self.aggregateDirs(fullPath, dirName, matchName) except ValueError: traceback.print_exc() except ftplib.error_perm: traceback.print_exc() else: if COMPLAIN_ABOUT_DUPS: self.log.warning( "Duplicate directories for series '%s'!", canonName) self.log.warning(" '%s/%s'", fullPath, dirName) self.log.warning(" '%s/%s'", fullPath, matchingName) ret[matchingName] = fqPath else: ret[matchingName] = fqPath return ret
def regenerateNameMappings(self): cur = self.conn.cursor() cur.execute("BEGIN ;") cur.execute("SELECT dbId, name, fsSafeName FROM munamelist;") rows = cur.fetchall() print("Processing %s items" % len(rows)) cnt = 0 for row in rows: dbId, name, fsSafeName = row prepped = nt.prepFilenameForMatching(name) if not prepped or (len(name) - len(prepped)) > 2: continue if prepped != fsSafeName: print("Bad match", row, prepped) cur.execute("UPDATE munamelist SET fsSafeName=%s WHERE dbId=%s", (prepped, dbId)) cnt += 1 if cnt % 1000 == 0: print("ON row ", cnt) cur.execute("COMMIT;") nt.dirNameProxy.stop()
def consolidateMangaFolders(dirPath, smartMode=True): idLut = nt.MtNamesMapWrapper("fsName->buId") pc = PathCleaner() pc.openDB() count = 0 print("Dir", dirPath) items = os.listdir(dirPath) items.sort() for item in items: item = os.path.join(dirPath, item) if os.path.isdir(item): fPath, dirName = os.path.split(item) lookup = nt.dirNameProxy[dirName] if lookup["fqPath"] != item: print() print() print("------------------------------------------------------") canonName = nt.getCanonicalMangaUpdatesName(dirName) print("Duplicate Directory '%s' - Canon = '%s'" % (dirName, canonName)) count += 1 mtId = idLut[nt.prepFilenameForMatching(dirName)] for num in mtId: print( " URL: https://www.mangaupdates.com/series.html?id=%s" % (num, )) fPath, dir2Name = os.path.split(lookup["fqPath"]) if not os.path.exists(item): print("'%s' has been removed. Skipping" % item) continue if not os.path.exists(lookup["fqPath"]): print("'%s' has been removed. Skipping" % lookup["fqPath"]) continue n1 = lv.distance(dirName, canonName) n2 = lv.distance(dir2Name, canonName) r1 = abs(nt.extractRatingToFloat(dirName)) r2 = abs(nt.extractRatingToFloat(dir2Name)) if "[complete]" in dirName.lower(): r1 += 0.1 if "[complete]" in dir2Name.lower(): r2 += 0.1 if "[wtf]" in dirName.lower(): r1 += 0.2 if "[wtf]" in dir2Name.lower(): r2 += 0.2 print(" 1: ", item) print(" 2: ", lookup["fqPath"]) print(" 1: ", dirName, ' ->', nt.getCanonicalMangaUpdatesName(dirName)) print(" 2: ", dir2Name, ' ->', nt.getCanonicalMangaUpdatesName(dir2Name)) print( " 1: ({num} items)(distance {dist})(rating {rat})".format( num=len(os.listdir(item)), dist=n1, rat=r1)) print( " 2: ({num} items)(distance {dist})(rating {rat})".format( num=len(os.listdir(lookup["fqPath"])), dist=n2, rat=r2)) mtId2 = idLut[nt.prepFilenameForMatching(dir2Name)] if mtId != mtId2: print("DISCORDANT ID NUMBERS - {num1}, {num2}!".format( num1=mtId, num2=mtId2)) for num in mtId2: print( " URL: https://www.mangaupdates.com/series.html?id=%s" % (num, )) continue if r1 > r2: doMove = "reverse" elif r2 > r1: doMove = "forward" else: doMove = '' if not doMove or not smartMode: doMove = query_response( "move files ('f' dir 1 -> dir 2. 'r' dir 1 <- dir 2. 'l' use levenshtein distance. 'n' do not move)?" ) if doMove == "forward": print("Forward move") fromDir = item toDir = lookup["fqPath"] elif doMove == "reverse": print("Reverse move") fromDir = lookup["fqPath"] toDir = item elif doMove == "levenshtein": print("Levenshtein distance chooser") # I'm using less then or equal, so situations where # both names are equadistant get aggregated anyways. if n1 <= n2: fromDir = lookup["fqPath"] toDir = item else: fromDir = item toDir = lookup["fqPath"] else: print("Skipping") continue print("moving from: '%s' " % fromDir) print(" to: '%s' " % toDir) items = os.listdir(fromDir) for item in items: fromPath = os.path.join(fromDir, item) toPath = os.path.join(toDir, item) loop = 2 while os.path.exists(toPath): pathBase, ext = os.path.splitext(toPath) print(" Duplicate file!") toPath = "{start} ({loop}){ext}".format(start=pathBase, loop=loop, ext=ext) print(" Moving: ", item) print(" From: ", fromPath) print(" To: ", toPath) pc.moveFile(fromPath, toPath) try: pc.moveFile(fromPath, toPath) except psycopg2.IntegrityError: print("Error moving item in dedup database") # pc.deletePath(toPath) shutil.move(fromPath, toPath) print("Deleting directory") os.rmdir(fromDir) print("total items", count)
def generate_row_meta(row): ret = {} filePath = "" if row.file: filePath = os.path.join(row.file.dirpath, row.file.filename) if row.series_name is None: sourceSeriesName = "NONE" seriesName = "NOT YET DETERMINED" else: sourceSeriesName = row.series_name seriesName = nt.getCanonicalMangaUpdatesName(row.series_name) cleanedName = nt.prepFilenameForMatching(sourceSeriesName) ret['itemInfo'] = nt.dirNameProxy[cleanedName] if ret['itemInfo']["rating"]: ret['rating'] = ret['itemInfo']["rating"] else: ret['rating'] = "" ret['ratingNum'] = nt.ratingStrToFloat(ret['rating']) if row.state == 'complete': ret['statusColour'] = colours["Done"] elif row.state == 'upload': ret['statusColour'] = colours["Uploaded"] elif row.state == 'fetching' or row.state == 'processing': ret['statusColour'] = colours["working"] elif row.state == 'new': ret['statusColour'] = colours["queued"] else: ret['statusColour'] = colours["error"] if filePath: if "=0=" in row.file.dirpath: if os.path.exists(filePath): ret['locationColour'] = colours["no match"] else: ret['locationColour'] = colours["moved"] elif settings.pickedDir in row.file.dirpath: ret['locationColour'] = colours["picked"] elif row.dirstate == 'created_dir': ret['locationColour'] = colours["new dir"] else: ret['locationColour'] = colours["valid cat"] else: if row.state == 'new': ret['locationColour'] = colours["queued"] elif row.state == 'upload': ret['locationColour'] = colours["valid cat"] elif row.state == 'fetching' or row.state == 'processing': ret['locationColour'] = colours["working"] else: ret['locationColour'] = colours["failed"] filePath = "N.A." toolTip = filePath.replace('"', "") + "<br>" toolTip += "Original series name: " + sourceSeriesName.replace('"', "") + "<br>" toolTip += "Proper MangaUpdates name: " + seriesName.replace('"', "") + "<br>" toolTip += "cleanedName: " + ret['itemInfo']["dirKey"] + "<br>" toolTip += "itemInfo: " + str(ret['itemInfo']).replace('"', "") + "<br>" toolTip += "rowId: " + str(row.id) + "<br>" toolTip += "sourceUrl: " + row.source_id + "<br>" toolTip += "dlState: " + str(row.state) + "<br>" toolTip += "Flags: " + str({ 'deleted': row.deleted, 'was_duplicate': row.was_duplicate, 'phash_duplicate': row.phash_duplicate, 'uploaded': row.uploaded, 'dirstate': row.dirstate }) + "<br>" toolTip += "item tags: " + str([tag for tag in row.tags]) + "<br>" if row.file: toolTip += "file manga tags: " + str( [tag for tag in row.file.manga_tags]) + "<br>" toolTip += "Source: " + str(row.source_site) + "<br>" ret['cellId'] = None if os.path.exists(filePath): toolTip += "File found." else: toolTip += "File is missing!" ret['cellId'] = uuid.uuid1(0).hex ret['toolTip'] = toolTip ret['shouldBold'] = False if row.origin_name: chap = nt.extractChapterVol(row.origin_name)[0] if isinstance(chap, float): if chap < 10: ret['shouldBold'] = True ret['terseDate'] = row.downloaded_at.strftime('%y-%m-%d %H:%M') return ret
def loadRemoteDirectory(self, fullPath, aggregate=False): ret = {} dirs = self.wg.getpage("https://manga.madokami.al/stupidapi/fakedirs") requirePrefix = splitall(fullPath) badwords = [ 'Non-English', 'Oneshots', 'Raws', 'Novels', '_Doujinshi', 'AutoUploaded from Assorted Sources', ] rows = [tmp for tmp in [splitall(item) for item in [item[1:] if item.startswith("./") else item for item in dirs.split("\n")] ] if ( len(tmp) >= len(requirePrefix) and all([tmp[x] == requirePrefix[x] for x in range(len(requirePrefix))]) and not any([badword in tmp for badword in badwords])) ] print(len(rows)) for line in rows: if len(line) == 6: dirName = line[-1] if not dirName: continue canonName = nt.getCanonicalMangaUpdatesName(dirName) matchingName = nt.prepFilenameForMatching(canonName) # prepFilenameForMatching can result in empty directory names in some cases. # Detect that, and don't bother with it if that happened. if not matchingName: continue fqPath = os.path.join(*line) fullPath = os.path.join(*line[:-1]) if matchingName in ret: tmp = ret[matchingName] matchpath, matchName = os.path.split(tmp[-1]) if isinstance(tmp, list): tmp = tmp.pop() if aggregate: try: fqPath = self.aggregateDirs(fullPath, matchpath,dirName, matchName) except CanonMismatch: pass except ValueError: traceback.print_exc() except ftplib.error_perm: traceback.print_exc() except PermissionError: traceback.print_exc() else: if COMPLAIN_ABOUT_DUPS: self.log.warning("Duplicate directories for series '%s'!", canonName) self.log.warning(" '%s/%s'", fullPath, dirName) self.log.warning(" '%s/%s'", matchpath, matchName) ret[matchingName].append(fqPath) else: ret[matchingName] = [fqPath] return ret
def parseTwoArgCall(cmd, val): if cmd == "import": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autoImporter.importDirectories(val) elif cmd == "organize": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.organizeFolder(val) elif cmd == "run": utilities.runPlugin.runPlugin(val) elif cmd == "rename": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.renameSeriesToMatchMangaUpdates(val) elif cmd == "lookup": print("Passed name = '%s'" % val) import nameTools as nt haveLookup = nt.haveCanonicalMangaUpdatesName(val) if not haveLookup: print("Item not found in MangaUpdates name synonym table") print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val)) else: print("Item found in lookup table!") print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) ) elif cmd == "purge-dir": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTemps(val) elif cmd == "purge-dir-phash": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTempsPhash(val) elif cmd == "dirs-restore": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.runRestoreDeduper(val) elif cmd == "sort-dir-contents": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.approxFileSorter.scanDirectories(val) elif cmd == "clean-archives": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.cleanFiles.cleanArchives(val) else: print("Did not understand command!") print("Sys.argv = ", sys.argv)
def parseTwoArgCall(cmd, val): if cmd == "import": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autoImporter.importDirectories(val) return if cmd == "organize": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.organizeFolder(val) return elif cmd == "rename": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.renameSeriesToMatchMangaUpdates(val) return elif cmd == "lookup": print("Passed name = '%s'" % val) import nameTools as nt haveLookup = nt.haveCanonicalMangaUpdatesName(val) if not haveLookup: print("Item not found in MangaUpdates name synonym table") print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val)) else: print("Item found in lookup table!") print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val)) elif cmd == "purge-dir": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTemps(val) return elif cmd == "purge-dir-phash": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTempsPhash(val) return elif cmd == "dirs-restore": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.runRestoreDeduper(val) return elif cmd == "sort-dir-contents": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.approxFileSorter.scanDirectories(val) return elif cmd == "clean-archives": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.cleanFiles.cleanArchives(val) return elif cmd == "h-clean": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.runHDeduper(val) return else: print("Did not understand command!") print("Sys.argv = ", sys.argv)
def consolidateMangaFolders(dirPath, smartMode=True): idLut = nt.MtNamesMapWrapper("fsName->buId") pc = PathCleaner() count = 0 print("Dir", dirPath) items = os.listdir(dirPath) items.sort() for item in items: item = os.path.join(dirPath, item) if os.path.isdir(item): fPath, dirName = os.path.split(item) lookup = nt.dirNameProxy[dirName] if lookup["fqPath"] != item: print() print() print("------------------------------------------------------") canonName = nt.getCanonicalMangaUpdatesName(dirName) print("Duplicate Directory '%s' - Canon = '%s'" % (dirName, canonName)) count += 1 mtId = idLut[nt.prepFilenameForMatching(dirName)] for num in mtId: print(" URL: https://www.mangaupdates.com/series.html?id=%s" % (num, )) fPath, dir2Name = os.path.split(lookup["fqPath"]) if not os.path.exists(item): print("'%s' has been removed. Skipping" % item) continue if not os.path.exists(lookup["fqPath"]): print("'%s' has been removed. Skipping" % lookup["fqPath"]) continue n1 = lv.distance(dirName, canonName) n2 = lv.distance(dir2Name, canonName) r1 = abs(nt.extractRatingToFloat(dirName)) r2 = abs(nt.extractRatingToFloat(dir2Name)) if "[complete]" in dirName.lower(): r1 += 0.1 if "[complete]" in dir2Name.lower(): r2 += 0.1 if "[wtf]" in dirName.lower(): r1 += 0.2 if "[wtf]" in dir2Name.lower(): r2 += 0.2 print(" 1: ", item) print(" 2: ", lookup["fqPath"]) print(" 1: ", dirName, ' ->', nt.getCanonicalMangaUpdatesName(dirName)) print(" 2: ", dir2Name, ' ->', nt.getCanonicalMangaUpdatesName(dir2Name)) print(" 1: ({num} items)(distance {dist})(rating {rat})".format(num=len(os.listdir(item)), dist=n1, rat=r1)) print(" 2: ({num} items)(distance {dist})(rating {rat})".format(num=len(os.listdir(lookup["fqPath"])), dist=n2, rat=r2)) mtId2 = idLut[nt.prepFilenameForMatching(dir2Name)] if mtId != mtId2: print("DISCORDANT ID NUMBERS - {num1}, {num2}!".format(num1=mtId, num2=mtId2)) for num in mtId2: print(" URL: https://www.mangaupdates.com/series.html?id=%s" % (num, )) continue if r1 > r2: doMove = "reverse" elif r2 > r1: doMove = "forward" else: doMove = '' if not doMove or not smartMode: doMove = query_response("move files ('f' dir 1 -> dir 2. 'r' dir 1 <- dir 2. 'l' use levenshtein distance. 'n' do not move)?") if doMove == "forward": print("Forward move") fromDir = item toDir = lookup["fqPath"] elif doMove == "reverse": print("Reverse move") fromDir = lookup["fqPath"] toDir = item elif doMove == "levenshtein": print("Levenshtein distance chooser") # I'm using less then or equal, so situations where # both names are equadistant get aggregated anyways. if n1 <= n2: fromDir = lookup["fqPath"] toDir = item else: fromDir = item toDir = lookup["fqPath"] else: print("Skipping") continue print("moving from: '%s' " % fromDir) print(" to: '%s' " % toDir) items = os.listdir(fromDir) for item in items: fromPath = os.path.join(fromDir, item) toPath = os.path.join(toDir, item) loop = 2 while os.path.exists(toPath): pathBase, ext = os.path.splitext(toPath) print(" Duplicate file!") toPath = "{start} ({loop}){ext}".format(start=pathBase, loop=loop, ext=ext) print(" Moving: ", item) print(" From: ", fromPath) print(" To: ", toPath) pc.moveFile(fromPath, toPath) try: pc.moveFile(fromPath, toPath) except psycopg2.IntegrityError: print("Error moving item in dedup database") # pc.deletePath(toPath) shutil.move(fromPath, toPath) print("Deleting directory") os.rmdir(fromDir) print("total items", count)