Example #1
0
	def moveUnlinkableDirectories(self, dirPath, toPath):


		print("Moving Unlinkable from", dirPath)
		print("To:", toPath)
		if not os.path.isdir(dirPath):
			print(dirPath, "is not a directory")
			raise ValueError
		if not os.path.isdir(toPath):
			print(toPath, "is not a directory")
			raise ValueError

		srcItems = os.listdir(dirPath)
		srcItems.sort()
		print("Len ", len(srcItems))
		for item in srcItems:
			itemPath = os.path.join(dirPath, item)
			if not os.path.isdir(itemPath):
				continue

			if not nt.haveCanonicalMangaUpdatesName(item):
				targetDir = os.path.join(toPath, item)
				print("Moving item", item, "to unlinked dir")
				shutil.move(itemPath, targetDir)


		srcItems = os.listdir(toPath)
		srcItems.sort()
		print("Len ", len(srcItems))
		for item in srcItems:
			itemPath = os.path.join(toPath, item)
			if not os.path.isdir(itemPath):
				continue

			if nt.haveCanonicalMangaUpdatesName(item):
				print("Moving item", item, "to linked dir")
				targetDir = os.path.join(dirPath, item)
				shutil.move(itemPath, targetDir)
			else:
				mId = nt.getAllMangaUpdatesIds(item)
				if mId:
					print("Item has multiple matches:", itemPath)
					for no in mId:
						print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (no, ))
Example #2
0
	def moveUnlinkableDirectories(self, dirPath, toPath):


		print("Moving Unlinkable from", dirPath)
		print("To:", toPath)
		if not os.path.isdir(dirPath):
			print(dirPath, "is not a directory")
			raise ValueError
		if not os.path.isdir(toPath):
			print(toPath, "is not a directory")
			raise ValueError

		srcItems = os.listdir(dirPath)
		srcItems.sort()
		print("Len ", len(srcItems))
		for item in srcItems:
			itemPath = os.path.join(dirPath, item)
			if not os.path.isdir(itemPath):
				continue

			if not nt.haveCanonicalMangaUpdatesName(item):
				targetDir = os.path.join(toPath, item)
				print("Moving item", item, "to unlinked dir")
				shutil.move(itemPath, targetDir)


		srcItems = os.listdir(toPath)
		srcItems.sort()
		print("Len ", len(srcItems))
		for item in srcItems:
			itemPath = os.path.join(toPath, item)
			if not os.path.isdir(itemPath):
				continue

			if nt.haveCanonicalMangaUpdatesName(item):
				print("Moving item", item, "to linked dir")
				targetDir = os.path.join(dirPath, item)
				shutil.move(itemPath, targetDir)
			else:
				mId = nt.getAllMangaUpdatesIds(item)
				if mId:
					print("Item has multiple matches:", itemPath)
					for no in mId:
						print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (no, ))
Example #3
0
	def extractFilename(self, inString):
		title, dummy_blurb = inString.rsplit("|", 1)
		# title, chapter = title.rsplit("-", 1)

		# Unescape htmlescaped items in the name/chapter
		ps = html.parser.HTMLParser()
		title = ps.unescape(title)

		vol = None
		chap = None
		volChap = None

		try:
			if " vol " in title.lower():
				title, volChap = title.rsplit(" vol ", 1)
				vol, dummy = volChap.strip().split(" ", 1)
		except ValueError:
			self.log.error("Could not parse volume number from title %s", title)
			traceback.print_exc()


		try:
			if volChap and " ch " in volChap:
				dummy, chap = volChap.rsplit(" ch ", 1)

			elif " ch " in title:
				title, chap = title.rsplit(" ch ", 1)

		except ValueError:
			self.log.error("Could not parse chapter number from title %s", title)
			traceback.print_exc()

		if chap:
			if "Page" in chap:
				chap, dummy = chap.split("Page", 1)

		elif title and "Page" in title:
			title, dummy = title.split("Page", 1)

		title = title.rstrip(" -")
		haveLookup = nt.haveCanonicalMangaUpdatesName(title)
		if not haveLookup:
			self.log.warning("Did not find title '%s' in MangaUpdates database!", title)
		title = nt.getCanonicalMangaUpdatesName(title).strip()


		volChap = []

		if vol:
			volChap.append("v{}".format(vol))
		if chap:
			volChap.append("c{}".format(chap))

		chapter = " ".join(volChap)

		return title, chapter
Example #4
0
def two_arg_lookup(val):
	print("Passed name = '%s'" % val)
	import nameTools as nt
	haveLookup = nt.haveCanonicalMangaUpdatesName(val)
	if not haveLookup:
		print("Item not found in MangaUpdates name synonym table")
		print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val))
	else:
		print("Item found in lookup table!")
		print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) )
Example #5
0
    def getDownloadPath(self, item, fName):

        if not item['seriesName']:
            self.log.info("No series set for item. Guessing from filename:")
            self.log.info("Filename = '%s'", fName)
            bareName = nt.guessSeriesFromFilename(fName)

            # if not matchName or not matchName in nt.dirNameProxy:
            if not nt.haveCanonicalMangaUpdatesName(bareName):
                item["seriesName"] = settings.ircBot["unknown-series"]
            else:
                item["seriesName"] = nt.getCanonicalMangaUpdatesName(bareName)

            self.log.info("Guessed  = '%s'. Updating series information",
                          item['seriesName'])
            self.updateDbEntry(item["sourceUrl"],
                               seriesName=item["seriesName"])

        dlPath, newDir = self.locateOrCreateDirectoryForSeries(
            item["seriesName"])

        if item["flags"] == None:
            item["flags"] = ""

        if newDir:
            self.updateDbEntry(item["sourceUrl"],
                               flags=" ".join([item["flags"], "haddir"]))
            self.conn.commit()

        fqFName = os.path.join(dlPath, fName)

        loop = 1

        fName, ext = os.path.splitext(fName)

        while os.path.exists(fqFName):
            fName = "%s - (%d).%s" % (fName, loop, ext)
            fqFName = os.path.join(dlPath, fName)
            loop += 1
        self.log.info("Saving to archive = %s", fqFName)

        self.updateDbEntry(item["sourceUrl"],
                           downloadPath=dlPath,
                           fileName=fName,
                           originName=fName)

        return fqFName
Example #6
0
	def getDownloadPath(self, item, fName):

		if not item['seriesName']:
			self.log.info("No series set for item. Guessing from filename:")
			self.log.info("Filename = '%s'", fName)
			bareName = nt.guessSeriesFromFilename(fName)

			# if not matchName or not matchName in nt.dirNameProxy:
			if not nt.haveCanonicalMangaUpdatesName(bareName):
				item["seriesName"] = settings.ircBot["unknown-series"]
			else:
				item["seriesName"] = nt.getCanonicalMangaUpdatesName(bareName)

			self.log.info("Guessed  = '%s'. Updating series information", item['seriesName'])
			self.updateDbEntry(item["sourceUrl"], seriesName=item["seriesName"])


		dlPath, newDir = self.locateOrCreateDirectoryForSeries(item["seriesName"])

		if item["flags"] == None:
			item["flags"] = ""

		if newDir:
			self.updateDbEntry(item["sourceUrl"], flags=" ".join([item["flags"], "haddir"]))
			self.conn.commit()

		fqFName = os.path.join(dlPath, fName)

		loop = 1

		fName, ext = os.path.splitext(fName)

		while os.path.exists(fqFName):
			fName = "%s - (%d).%s" % (fName, loop, ext)
			fqFName = os.path.join(dlPath, fName)
			loop += 1
		self.log.info("Saving to archive = %s", fqFName)


		self.updateDbEntry(item["sourceUrl"], downloadPath=dlPath, fileName=fName, originName=fName)

		return fqFName
Example #7
0
def parseTwoArgCall(cmd, val):
	if cmd == "import":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		autoImporter.importDirectories(val)

	elif cmd == "organize":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		autOrg.organizeFolder(val)

	elif cmd == "run":
		utilities.runPlugin.runPlugin(val)

	elif cmd == "rename":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		autOrg.renameSeriesToMatchMangaUpdates(val)

	elif cmd == "lookup":
		print("Passed name = '%s'" % val)
		import nameTools as nt
		haveLookup = nt.haveCanonicalMangaUpdatesName(val)
		if not haveLookup:
			print("Item not found in MangaUpdates name synonym table")
			print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val))
		else:
			print("Item found in lookup table!")
			print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) )

	elif cmd == "purge-dir":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.dedupDir.purgeDedupTemps(val)
	elif cmd == "purge-dir-phash":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.dedupDir.purgeDedupTempsPhash(val)

	elif cmd == "dirs-restore":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.dedupDir.runRestoreDeduper(val)

	elif cmd == "sort-dir-contents":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.approxFileSorter.scanDirectories(val)


	elif cmd == "clean-archives":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.cleanFiles.cleanArchives(val)

	else:
		print("Did not understand command!")
		print("Sys.argv = ", sys.argv)
Example #8
0
def parseTwoArgCall(cmd, val):
    if cmd == "import":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        autoImporter.importDirectories(val)
        return

    if cmd == "organize":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        autOrg.organizeFolder(val)
        return

    elif cmd == "rename":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        autOrg.renameSeriesToMatchMangaUpdates(val)
        return

    elif cmd == "lookup":
        print("Passed name = '%s'" % val)
        import nameTools as nt
        haveLookup = nt.haveCanonicalMangaUpdatesName(val)
        if not haveLookup:
            print("Item not found in MangaUpdates name synonym table")
            print("Processed item as searched = '%s'" %
                  nt.prepFilenameForMatching(val))
        else:
            print("Item found in lookup table!")
            print("Canonical name = '%s'" %
                  nt.getCanonicalMangaUpdatesName(val))

    elif cmd == "purge-dir":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        utilities.dedupDir.purgeDedupTemps(val)
        return
    elif cmd == "purge-dir-phash":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        utilities.dedupDir.purgeDedupTempsPhash(val)
        return

    elif cmd == "dirs-restore":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        utilities.dedupDir.runRestoreDeduper(val)
        return

    elif cmd == "sort-dir-contents":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        utilities.approxFileSorter.scanDirectories(val)
        return

    elif cmd == "clean-archives":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        utilities.cleanFiles.cleanArchives(val)
        return

    elif cmd == "h-clean":
        if not os.path.exists(val):
            print("Passed path '%s' does not exist!" % val)
            return
        utilities.dedupDir.runHDeduper(val)
        return

    else:
        print("Did not understand command!")
        print("Sys.argv = ", sys.argv)
Example #9
0
	def consolidateSeriesNaming(self):


		cur = self.get_cursor()
		# cur.execute("BEGIN;")
		# print("Querying")
		# cur.execute("SELECT DISTINCT(seriesName) FROM {tableName};".format(tableName=self.tableName))
		# print("Queried. Fetching results")
		# ret = cur.fetchall()
		# cur.execute("COMMIT;")
		# print("Have results. Processing")

		# for item in ret:
		# 	item = item[0]
		# 	if not item:
		# 		continue

		# 	mId = nt.getMangaUpdatesId(item)
		# 	if not mId:
		# 		print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId))
		# print("Total: ", len(ret))

		items = ["Murciélago", "Murcielago", "Murciélago"]

		for item in items:
			print("------", item, nt.getCanonicalMangaUpdatesName(item), nt.haveCanonicalMangaUpdatesName(item))

		# cur.execute("BEGIN;")
		# print("Querying")
		# cur.execute("SELECT DISTINCT ON (buname) buname, buId FROM mangaseries ORDER BY buname, buid;")
		# print("Queried. Fetching results")
		# ret = cur.fetchall()
		# cur.execute("COMMIT;")
		# print("Have results. Processing")

		# cur.execute("BEGIN;")

		# missing = 0
		# for item in ret:
		# 	buName, buId = item
		# 	if not buName:
		# 		continue

		# 	cur.execute("SELECT * FROM munamelist WHERE name=%s;", (buName, ))
		# 	ret = cur.fetchall()
		# 	# mId = nt.getMangaUpdatesId(buName)

		# 	if not ret:
		# 		print("Item missing '{item}', mid:{mid}".format(item=item, mid=ret))
		# 		self.insertNames(buId, [buName])
		# 		missing += 1

		# 	if not runStatus.run:
		# 		break
		# 		# print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId))
		# print("Total: ", len(ret))
		# print("Missing: ", missing)


		# for  dbId, sourceUrl in ret:
		# 	if "batoto" in sourceUrl.lower():
		# 		sourceUrl = sourceUrl.replace("http://www.batoto.net/", "http://bato.to/")
		# 		print("Link", sourceUrl)

		# 		cur.execute("SELECT dbId FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, ))
		# 		ret = cur.fetchall()
		# 		if not ret:
		# 			print("Updating")
		# 			cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId))

		# 		else:
		# 			print("Replacing")
		# 			cur.execute("DELETE FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, ))
		# 			cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId))


		cur.execute("COMMIT;")
Example #10
0
	def _processLinksIntoDB(self, linksDicts, isPicked=False):


		# item["date"]     = time.time()
		# item["dlName"]   = linkName
		# item["dlLink"]   = itemUrl
		# item["baseName"] = dirName

		self.log.info( "Inserting...",)
		newItems   = 0
		oldItems   = 0
		movedItems = 0
		brokeItems = 0
		for seriesName, fqFileN in linksDicts:

			dlLink = urllib.parse.urljoin(self.url_base, fqFileN)
			fileN = os.path.split(fqFileN)[-1]


			# Look up by URL, so we don't break the UNIQUE constraint.
			rows = self.getRowsByValue(sourceUrl  = dlLink)

			# rows = [row for row in rows if row['dlState'] < 3]


			if len(rows) == 0:
				newItems += 1

				# Flags has to be an empty string, because the DB is annoying.
				# TL;DR, comparing with LIKE in a column that has NULLs in it is somewhat broken.
				self.insertIntoDb(retreivalTime = time.time(),
									sourceUrl   = dlLink,
									originName  = fileN,
									dlState     = 0,
									seriesName  = seriesName,
									flags       = '',
									commit      = False)  # Defer commiting changes to speed things up



				self.log.info("New item: %s", (nt.haveCanonicalMangaUpdatesName(seriesName), dlLink, seriesName, fileN))

			elif len(rows) > 1:
				brokeItems += 1
				self.log.warning("Have more then one item for filename! Wat?")
				self.log.warning("Info dict for file:")
				self.log.warning("'%s'", (dlLink, seriesName, fileN))
				self.log.warning("Found rows:")
				for row in rows:

					self.log.warning("'%s'", row)
					self.log.warning("'%s'", row['dlState'] < 3)
			elif len(rows) == 1:
				row = rows.pop()
				if row["sourceUrl"] != dlLink:
					self.log.info("File has been moved: %s!", (seriesName, fileN))
					self.log.info("Old: %s", row["sourceUrl"])
					self.log.info("New: %s", dlLink)

					self.updateDbEntryById(row["dbId"], sourceUrl = dlLink)
					movedItems += 1
				else:
					oldItems += 1

			else:
				row = row.pop()

		self.log.info( "Done")


		self.log.info("%s new items, %s old items, %s moved items,  %s items with broken rows.", newItems, oldItems, movedItems, brokeItems)
Example #11
0
	def _processLinksIntoDB(self, linksDicts, isPicked=False):


		# item["date"]     = time.time()
		# item["dlName"]   = linkName
		# item["dlLink"]   = itemUrl
		# item["baseName"] = dirName

		self.log.info( "Inserting...",)
		newItems   = 0
		oldItems   = 0
		movedItems = 0
		brokeItems = 0
		for seriesName, fqFileN in linksDicts:

			dlLink = urllib.parse.urljoin(self.url_base, fqFileN)
			fileN = os.path.split(fqFileN)[-1]


			# Look up by URL, so we don't break the UNIQUE constraint.
			rows = self.getRowsByValue(sourceUrl  = dlLink)
			rows = [row for row in rows if row['dlState'] < 3]
			# if not rows:
			# 	#We only look at the filename/series tuple to determine uniqueness,
			# 	rows = self.getRowsByValue(originName = fileN, seriesname = seriesName)

			if len(rows) == 0:
				newItems += 1

				# Flags has to be an empty string, because the DB is annoying.
				# TL;DR, comparing with LIKE in a column that has NULLs in it is somewhat broken.
				self.insertIntoDb(retreivalTime = time.time(),
									sourceUrl   = dlLink,
									originName  = fileN,
									dlState     = 0,
									seriesName  = seriesName,
									flags       = '',
									commit      = False)  # Defer commiting changes to speed things up



				self.log.info("New item! Have canon name: %s, URL: %s, Series: %s, FileName: %s", nt.haveCanonicalMangaUpdatesName(seriesName), dlLink, seriesName, fileN)

			elif len(rows) > 1:
				brokeItems += 1
				self.log.warning("Have more then one item for filename! Wat?")
				self.log.warning("Info dict for file:")
				self.log.warning("'%s'", (dlLink, seriesName, fileN))
				self.log.warning("Found rows:")
				for row in rows:

					self.log.warning("'%s'", row)
					self.log.warning("'%s'", row['dlState'] < 3)


			elif len(rows) == 1:
				row = rows.pop()
				if row["sourceUrl"] != dlLink:
					self.log.info("File has been moved: %s!", (seriesName, fileN))
					self.log.info("Old: %s", row["sourceUrl"])
					self.log.info("New: %s", dlLink)

					self.updateDbEntryById(row["dbId"], sourceUrl = dlLink)
					movedItems += 1
				else:
					oldItems += 1

			else:
				row = row.pop()

		self.log.info( "Done")

		self.log.info("%s new items, %s old items, %s moved items,  %s items with broken rows.", newItems, oldItems, movedItems, brokeItems)
Example #12
0
	def consolidateSeriesNaming(self):


		cur = self.conn.cursor()
		# cur.execute("BEGIN;")
		# print("Querying")
		# cur.execute("SELECT DISTINCT(seriesName) FROM {tableName};".format(tableName=self.tableName))
		# print("Queried. Fetching results")
		# ret = cur.fetchall()
		# cur.execute("COMMIT;")
		# print("Have results. Processing")

		# for item in ret:
		# 	item = item[0]
		# 	if not item:
		# 		continue

		# 	mId = nt.getMangaUpdatesId(item)
		# 	if not mId:
		# 		print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId))
		# print("Total: ", len(ret))

		items = ["Murciélago", "Murcielago", "Murciélago"]

		for item in items:
			print("------", item, nt.getCanonicalMangaUpdatesName(item), nt.haveCanonicalMangaUpdatesName(item))

		# cur.execute("BEGIN;")
		# print("Querying")
		# cur.execute("SELECT DISTINCT ON (buname) buname, buId FROM mangaseries ORDER BY buname, buid;")
		# print("Queried. Fetching results")
		# ret = cur.fetchall()
		# cur.execute("COMMIT;")
		# print("Have results. Processing")

		# cur.execute("BEGIN;")

		# missing = 0
		# for item in ret:
		# 	buName, buId = item
		# 	if not buName:
		# 		continue

		# 	cur.execute("SELECT * FROM munamelist WHERE name=%s;", (buName, ))
		# 	ret = cur.fetchall()
		# 	# mId = nt.getMangaUpdatesId(buName)

		# 	if not ret:
		# 		print("Item missing '{item}', mid:{mid}".format(item=item, mid=ret))
		# 		self.insertNames(buId, [buName])
		# 		missing += 1

		# 	if not runStatus.run:
		# 		break
		# 		# print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId))
		# print("Total: ", len(ret))
		# print("Missing: ", missing)


		# for  dbId, sourceUrl in ret:
		# 	if "batoto" in sourceUrl.lower():
		# 		sourceUrl = sourceUrl.replace("http://www.batoto.net/", "http://bato.to/")
		# 		print("Link", sourceUrl)

		# 		cur.execute("SELECT dbId FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, ))
		# 		ret = cur.fetchall()
		# 		if not ret:
		# 			print("Updating")
		# 			cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId))

		# 		else:
		# 			print("Replacing")
		# 			cur.execute("DELETE FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, ))
		# 			cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId))


		cur.execute("COMMIT;")