def moveUnlinkableDirectories(self, dirPath, toPath): print("Moving Unlinkable from", dirPath) print("To:", toPath) if not os.path.isdir(dirPath): print(dirPath, "is not a directory") raise ValueError if not os.path.isdir(toPath): print(toPath, "is not a directory") raise ValueError srcItems = os.listdir(dirPath) srcItems.sort() print("Len ", len(srcItems)) for item in srcItems: itemPath = os.path.join(dirPath, item) if not os.path.isdir(itemPath): continue if not nt.haveCanonicalMangaUpdatesName(item): targetDir = os.path.join(toPath, item) print("Moving item", item, "to unlinked dir") shutil.move(itemPath, targetDir) srcItems = os.listdir(toPath) srcItems.sort() print("Len ", len(srcItems)) for item in srcItems: itemPath = os.path.join(toPath, item) if not os.path.isdir(itemPath): continue if nt.haveCanonicalMangaUpdatesName(item): print("Moving item", item, "to linked dir") targetDir = os.path.join(dirPath, item) shutil.move(itemPath, targetDir) else: mId = nt.getAllMangaUpdatesIds(item) if mId: print("Item has multiple matches:", itemPath) for no in mId: print(" URL: https://www.mangaupdates.com/series.html?id=%s" % (no, ))
def extractFilename(self, inString): title, dummy_blurb = inString.rsplit("|", 1) # title, chapter = title.rsplit("-", 1) # Unescape htmlescaped items in the name/chapter ps = html.parser.HTMLParser() title = ps.unescape(title) vol = None chap = None volChap = None try: if " vol " in title.lower(): title, volChap = title.rsplit(" vol ", 1) vol, dummy = volChap.strip().split(" ", 1) except ValueError: self.log.error("Could not parse volume number from title %s", title) traceback.print_exc() try: if volChap and " ch " in volChap: dummy, chap = volChap.rsplit(" ch ", 1) elif " ch " in title: title, chap = title.rsplit(" ch ", 1) except ValueError: self.log.error("Could not parse chapter number from title %s", title) traceback.print_exc() if chap: if "Page" in chap: chap, dummy = chap.split("Page", 1) elif title and "Page" in title: title, dummy = title.split("Page", 1) title = title.rstrip(" -") haveLookup = nt.haveCanonicalMangaUpdatesName(title) if not haveLookup: self.log.warning("Did not find title '%s' in MangaUpdates database!", title) title = nt.getCanonicalMangaUpdatesName(title).strip() volChap = [] if vol: volChap.append("v{}".format(vol)) if chap: volChap.append("c{}".format(chap)) chapter = " ".join(volChap) return title, chapter
def two_arg_lookup(val): print("Passed name = '%s'" % val) import nameTools as nt haveLookup = nt.haveCanonicalMangaUpdatesName(val) if not haveLookup: print("Item not found in MangaUpdates name synonym table") print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val)) else: print("Item found in lookup table!") print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) )
def getDownloadPath(self, item, fName): if not item['seriesName']: self.log.info("No series set for item. Guessing from filename:") self.log.info("Filename = '%s'", fName) bareName = nt.guessSeriesFromFilename(fName) # if not matchName or not matchName in nt.dirNameProxy: if not nt.haveCanonicalMangaUpdatesName(bareName): item["seriesName"] = settings.ircBot["unknown-series"] else: item["seriesName"] = nt.getCanonicalMangaUpdatesName(bareName) self.log.info("Guessed = '%s'. Updating series information", item['seriesName']) self.updateDbEntry(item["sourceUrl"], seriesName=item["seriesName"]) dlPath, newDir = self.locateOrCreateDirectoryForSeries( item["seriesName"]) if item["flags"] == None: item["flags"] = "" if newDir: self.updateDbEntry(item["sourceUrl"], flags=" ".join([item["flags"], "haddir"])) self.conn.commit() fqFName = os.path.join(dlPath, fName) loop = 1 fName, ext = os.path.splitext(fName) while os.path.exists(fqFName): fName = "%s - (%d).%s" % (fName, loop, ext) fqFName = os.path.join(dlPath, fName) loop += 1 self.log.info("Saving to archive = %s", fqFName) self.updateDbEntry(item["sourceUrl"], downloadPath=dlPath, fileName=fName, originName=fName) return fqFName
def getDownloadPath(self, item, fName): if not item['seriesName']: self.log.info("No series set for item. Guessing from filename:") self.log.info("Filename = '%s'", fName) bareName = nt.guessSeriesFromFilename(fName) # if not matchName or not matchName in nt.dirNameProxy: if not nt.haveCanonicalMangaUpdatesName(bareName): item["seriesName"] = settings.ircBot["unknown-series"] else: item["seriesName"] = nt.getCanonicalMangaUpdatesName(bareName) self.log.info("Guessed = '%s'. Updating series information", item['seriesName']) self.updateDbEntry(item["sourceUrl"], seriesName=item["seriesName"]) dlPath, newDir = self.locateOrCreateDirectoryForSeries(item["seriesName"]) if item["flags"] == None: item["flags"] = "" if newDir: self.updateDbEntry(item["sourceUrl"], flags=" ".join([item["flags"], "haddir"])) self.conn.commit() fqFName = os.path.join(dlPath, fName) loop = 1 fName, ext = os.path.splitext(fName) while os.path.exists(fqFName): fName = "%s - (%d).%s" % (fName, loop, ext) fqFName = os.path.join(dlPath, fName) loop += 1 self.log.info("Saving to archive = %s", fqFName) self.updateDbEntry(item["sourceUrl"], downloadPath=dlPath, fileName=fName, originName=fName) return fqFName
def parseTwoArgCall(cmd, val): if cmd == "import": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autoImporter.importDirectories(val) elif cmd == "organize": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.organizeFolder(val) elif cmd == "run": utilities.runPlugin.runPlugin(val) elif cmd == "rename": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.renameSeriesToMatchMangaUpdates(val) elif cmd == "lookup": print("Passed name = '%s'" % val) import nameTools as nt haveLookup = nt.haveCanonicalMangaUpdatesName(val) if not haveLookup: print("Item not found in MangaUpdates name synonym table") print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val)) else: print("Item found in lookup table!") print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) ) elif cmd == "purge-dir": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTemps(val) elif cmd == "purge-dir-phash": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTempsPhash(val) elif cmd == "dirs-restore": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.runRestoreDeduper(val) elif cmd == "sort-dir-contents": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.approxFileSorter.scanDirectories(val) elif cmd == "clean-archives": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.cleanFiles.cleanArchives(val) else: print("Did not understand command!") print("Sys.argv = ", sys.argv)
def parseTwoArgCall(cmd, val): if cmd == "import": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autoImporter.importDirectories(val) return if cmd == "organize": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.organizeFolder(val) return elif cmd == "rename": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return autOrg.renameSeriesToMatchMangaUpdates(val) return elif cmd == "lookup": print("Passed name = '%s'" % val) import nameTools as nt haveLookup = nt.haveCanonicalMangaUpdatesName(val) if not haveLookup: print("Item not found in MangaUpdates name synonym table") print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val)) else: print("Item found in lookup table!") print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val)) elif cmd == "purge-dir": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTemps(val) return elif cmd == "purge-dir-phash": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.purgeDedupTempsPhash(val) return elif cmd == "dirs-restore": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.runRestoreDeduper(val) return elif cmd == "sort-dir-contents": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.approxFileSorter.scanDirectories(val) return elif cmd == "clean-archives": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.cleanFiles.cleanArchives(val) return elif cmd == "h-clean": if not os.path.exists(val): print("Passed path '%s' does not exist!" % val) return utilities.dedupDir.runHDeduper(val) return else: print("Did not understand command!") print("Sys.argv = ", sys.argv)
def consolidateSeriesNaming(self): cur = self.get_cursor() # cur.execute("BEGIN;") # print("Querying") # cur.execute("SELECT DISTINCT(seriesName) FROM {tableName};".format(tableName=self.tableName)) # print("Queried. Fetching results") # ret = cur.fetchall() # cur.execute("COMMIT;") # print("Have results. Processing") # for item in ret: # item = item[0] # if not item: # continue # mId = nt.getMangaUpdatesId(item) # if not mId: # print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId)) # print("Total: ", len(ret)) items = ["Murciélago", "Murcielago", "Murciélago"] for item in items: print("------", item, nt.getCanonicalMangaUpdatesName(item), nt.haveCanonicalMangaUpdatesName(item)) # cur.execute("BEGIN;") # print("Querying") # cur.execute("SELECT DISTINCT ON (buname) buname, buId FROM mangaseries ORDER BY buname, buid;") # print("Queried. Fetching results") # ret = cur.fetchall() # cur.execute("COMMIT;") # print("Have results. Processing") # cur.execute("BEGIN;") # missing = 0 # for item in ret: # buName, buId = item # if not buName: # continue # cur.execute("SELECT * FROM munamelist WHERE name=%s;", (buName, )) # ret = cur.fetchall() # # mId = nt.getMangaUpdatesId(buName) # if not ret: # print("Item missing '{item}', mid:{mid}".format(item=item, mid=ret)) # self.insertNames(buId, [buName]) # missing += 1 # if not runStatus.run: # break # # print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId)) # print("Total: ", len(ret)) # print("Missing: ", missing) # for dbId, sourceUrl in ret: # if "batoto" in sourceUrl.lower(): # sourceUrl = sourceUrl.replace("http://www.batoto.net/", "http://bato.to/") # print("Link", sourceUrl) # cur.execute("SELECT dbId FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, )) # ret = cur.fetchall() # if not ret: # print("Updating") # cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId)) # else: # print("Replacing") # cur.execute("DELETE FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, )) # cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId)) cur.execute("COMMIT;")
def _processLinksIntoDB(self, linksDicts, isPicked=False): # item["date"] = time.time() # item["dlName"] = linkName # item["dlLink"] = itemUrl # item["baseName"] = dirName self.log.info( "Inserting...",) newItems = 0 oldItems = 0 movedItems = 0 brokeItems = 0 for seriesName, fqFileN in linksDicts: dlLink = urllib.parse.urljoin(self.url_base, fqFileN) fileN = os.path.split(fqFileN)[-1] # Look up by URL, so we don't break the UNIQUE constraint. rows = self.getRowsByValue(sourceUrl = dlLink) # rows = [row for row in rows if row['dlState'] < 3] if len(rows) == 0: newItems += 1 # Flags has to be an empty string, because the DB is annoying. # TL;DR, comparing with LIKE in a column that has NULLs in it is somewhat broken. self.insertIntoDb(retreivalTime = time.time(), sourceUrl = dlLink, originName = fileN, dlState = 0, seriesName = seriesName, flags = '', commit = False) # Defer commiting changes to speed things up self.log.info("New item: %s", (nt.haveCanonicalMangaUpdatesName(seriesName), dlLink, seriesName, fileN)) elif len(rows) > 1: brokeItems += 1 self.log.warning("Have more then one item for filename! Wat?") self.log.warning("Info dict for file:") self.log.warning("'%s'", (dlLink, seriesName, fileN)) self.log.warning("Found rows:") for row in rows: self.log.warning("'%s'", row) self.log.warning("'%s'", row['dlState'] < 3) elif len(rows) == 1: row = rows.pop() if row["sourceUrl"] != dlLink: self.log.info("File has been moved: %s!", (seriesName, fileN)) self.log.info("Old: %s", row["sourceUrl"]) self.log.info("New: %s", dlLink) self.updateDbEntryById(row["dbId"], sourceUrl = dlLink) movedItems += 1 else: oldItems += 1 else: row = row.pop() self.log.info( "Done") self.log.info("%s new items, %s old items, %s moved items, %s items with broken rows.", newItems, oldItems, movedItems, brokeItems)
def _processLinksIntoDB(self, linksDicts, isPicked=False): # item["date"] = time.time() # item["dlName"] = linkName # item["dlLink"] = itemUrl # item["baseName"] = dirName self.log.info( "Inserting...",) newItems = 0 oldItems = 0 movedItems = 0 brokeItems = 0 for seriesName, fqFileN in linksDicts: dlLink = urllib.parse.urljoin(self.url_base, fqFileN) fileN = os.path.split(fqFileN)[-1] # Look up by URL, so we don't break the UNIQUE constraint. rows = self.getRowsByValue(sourceUrl = dlLink) rows = [row for row in rows if row['dlState'] < 3] # if not rows: # #We only look at the filename/series tuple to determine uniqueness, # rows = self.getRowsByValue(originName = fileN, seriesname = seriesName) if len(rows) == 0: newItems += 1 # Flags has to be an empty string, because the DB is annoying. # TL;DR, comparing with LIKE in a column that has NULLs in it is somewhat broken. self.insertIntoDb(retreivalTime = time.time(), sourceUrl = dlLink, originName = fileN, dlState = 0, seriesName = seriesName, flags = '', commit = False) # Defer commiting changes to speed things up self.log.info("New item! Have canon name: %s, URL: %s, Series: %s, FileName: %s", nt.haveCanonicalMangaUpdatesName(seriesName), dlLink, seriesName, fileN) elif len(rows) > 1: brokeItems += 1 self.log.warning("Have more then one item for filename! Wat?") self.log.warning("Info dict for file:") self.log.warning("'%s'", (dlLink, seriesName, fileN)) self.log.warning("Found rows:") for row in rows: self.log.warning("'%s'", row) self.log.warning("'%s'", row['dlState'] < 3) elif len(rows) == 1: row = rows.pop() if row["sourceUrl"] != dlLink: self.log.info("File has been moved: %s!", (seriesName, fileN)) self.log.info("Old: %s", row["sourceUrl"]) self.log.info("New: %s", dlLink) self.updateDbEntryById(row["dbId"], sourceUrl = dlLink) movedItems += 1 else: oldItems += 1 else: row = row.pop() self.log.info( "Done") self.log.info("%s new items, %s old items, %s moved items, %s items with broken rows.", newItems, oldItems, movedItems, brokeItems)
def consolidateSeriesNaming(self): cur = self.conn.cursor() # cur.execute("BEGIN;") # print("Querying") # cur.execute("SELECT DISTINCT(seriesName) FROM {tableName};".format(tableName=self.tableName)) # print("Queried. Fetching results") # ret = cur.fetchall() # cur.execute("COMMIT;") # print("Have results. Processing") # for item in ret: # item = item[0] # if not item: # continue # mId = nt.getMangaUpdatesId(item) # if not mId: # print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId)) # print("Total: ", len(ret)) items = ["Murciélago", "Murcielago", "Murciélago"] for item in items: print("------", item, nt.getCanonicalMangaUpdatesName(item), nt.haveCanonicalMangaUpdatesName(item)) # cur.execute("BEGIN;") # print("Querying") # cur.execute("SELECT DISTINCT ON (buname) buname, buId FROM mangaseries ORDER BY buname, buid;") # print("Queried. Fetching results") # ret = cur.fetchall() # cur.execute("COMMIT;") # print("Have results. Processing") # cur.execute("BEGIN;") # missing = 0 # for item in ret: # buName, buId = item # if not buName: # continue # cur.execute("SELECT * FROM munamelist WHERE name=%s;", (buName, )) # ret = cur.fetchall() # # mId = nt.getMangaUpdatesId(buName) # if not ret: # print("Item missing '{item}', mid:{mid}".format(item=item, mid=ret)) # self.insertNames(buId, [buName]) # missing += 1 # if not runStatus.run: # break # # print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId)) # print("Total: ", len(ret)) # print("Missing: ", missing) # for dbId, sourceUrl in ret: # if "batoto" in sourceUrl.lower(): # sourceUrl = sourceUrl.replace("http://www.batoto.net/", "http://bato.to/") # print("Link", sourceUrl) # cur.execute("SELECT dbId FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, )) # ret = cur.fetchall() # if not ret: # print("Updating") # cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId)) # else: # print("Replacing") # cur.execute("DELETE FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, )) # cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId)) cur.execute("COMMIT;")