Example #1
0
	def getLink(self, linkDict):
		try:
			linkDict = self.getDownloadInfo(linkDict)
			images = self.getImages(linkDict)
			title  = linkDict['title']
			artist = linkDict['artist']

		except webFunctions.ContentError:
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-2, downloadPath="ERROR", fileName="ERROR: FAILED")
			return False

		if images and title:
			fileN = title+" "+artist+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			wholePath = self.insertCountIfFilenameExists(wholePath)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.

			try:
				arch = zipfile.ZipFile(wholePath, "w")
			except OSError:
				title = title.encode('ascii','ignore').decode('ascii')
				fileN = title+".zip"
				fileN = nt.makeFilenameSafe(fileN)
				wholePath = os.path.join(linkDict["dirPath"], fileN)
				arch = zipfile.ZipFile(wholePath, "w")

			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)


			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)

			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True, rowId=linkDict['dbId'])
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)


			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			return False
Example #2
0
    def getUploadDirectory(self, seriesName):

        ulDir = self.getExistingDir(seriesName)

        if not ulDir:
            seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
            safeFilename = nt.makeFilenameSafe(seriesName)
            matchName = nt.prepFilenameForMatching(seriesName)
            matchName = matchName.encode('utf-8', 'ignore').decode('utf-8')

            self.checkInitDirs()
            if matchName in self.mainDirs:
                ulDir = self.mainDirs[matchName][0]
            elif seriesName in self.mainDirs:
                ulDir = self.mainDirs[seriesName][0]
            else:

                self.log.info("Need to create container directory for %s",
                              seriesName)
                ulDir = os.path.join(settings.mkSettings["uploadContainerDir"],
                                     settings.mkSettings["uploadDir"],
                                     safeFilename)
                try:
                    self.sftp.mkdir(ulDir)
                except OSError as e:
                    # If the error is just a "directory exists" warning, ignore it silently
                    if str(e) == 'OSError: File already exists':
                        pass
                    else:
                        self.log.warn("Error creating directory?")
                        self.log.warn(traceback.format_exc())

        return ulDir
Example #3
0
	def getDoujinshiUploadDirectory(self, seriesName):
		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName = nt.prepFilenameForMatching(seriesName)
			matchName = matchName.encode('latin-1', 'ignore').decode('latin-1')

			self.checkInitDirs()
			if matchName in self.unsortedDirs:
				ulDir = self.unsortedDirs[matchName]
			elif safeFilename in self.unsortedDirs:
				ulDir = self.unsortedDirs[safeFilename]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.sftp.mkdir(ulDir)
				except ftplib.error_perm:
					self.log.warn("Directory exists?")
					self.log.warn(traceback.format_exc())


		return ulDir
Example #4
0
    def getLink(self, link):
        sourceUrl = link["sourceUrl"]
        seriesName = link["seriesName"]
        originFileName = link["originName"]

        self.updateDbEntry(sourceUrl, dlState=1)
        self.log.info("Downloading = '%s', '%s'", seriesName, originFileName)
        dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

        if link["flags"] == None:
            link["flags"] = ""

        if newDir:
            self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
            self.conn.commit()

        try:
            content, headerName = self.getLinkFile(sourceUrl)
        except:
            self.log.error("Unrecoverable error retreiving content %s", link)
            self.log.error("Traceback: %s", traceback.format_exc())

            self.updateDbEntry(sourceUrl, dlState=-1)
            return

        headerName = urllib.parse.unquote(headerName)

        fName = "%s - %s" % (originFileName, headerName)
        fName = nt.makeFilenameSafe(fName)

        fName, ext = os.path.splitext(fName)
        fName = "%s [CXC Scans]%s" % (fName, ext)

        fqFName = os.path.join(dlPath, fName)
        self.log.info("SaveName = %s", fqFName)

        loop = 1
        while os.path.exists(fqFName):
            fName, ext = os.path.splitext(fName)
            fName = "%s (%d)%s" % (fName, loop, ext)
            fqFName = os.path.join(link["targetDir"], fName)
            loop += 1
        self.log.info("Writing file")

        filePath, fileName = os.path.split(fqFName)

        try:
            with open(fqFName, "wb") as fp:
                fp.write(content)
        except TypeError:
            self.log.error("Failure trying to retreive content from source %s", sourceUrl)
            self.updateDbEntry(sourceUrl, dlState=-4, downloadPath=filePath, fileName=fileName)
            return
            # self.log.info( filePath)

        dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True)

        self.log.info("Done")
        self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
        return
Example #5
0
    def locateOrCreateDirectoryForSeries(self, seriesName):

        if self.shouldCanonize and self.is_manga:
            canonSeriesName = nt.getCanonicalMangaUpdatesName(seriesName)
        else:
            canonSeriesName = seriesName

        safeBaseName = nt.makeFilenameSafe(canonSeriesName)

        targetDir = os.path.join(settings.mkSettings["dirs"]['bookDir'],
                                 safeBaseName)
        if not os.path.exists(targetDir):
            self.log.info("Don't have target dir for: %s, full name = %s",
                          canonSeriesName, seriesName)
            try:
                os.makedirs(targetDir)
                return targetDir, True

            except FileExistsError:
                # Probably means the directory was concurrently created by another thread in the background?
                self.log.critical("Directory doesn't exist, and yet it does?")
                self.log.critical(traceback.format_exc())
            except OSError:
                self.log.critical("Directory creation failed?")
                self.log.critical(traceback.format_exc())

        else:
            self.log.info("Directory exists.")
            self.log.info("Directory not found in dir-dict, but it exists!")
            self.log.info("Directory-Path: %s", targetDir)
            self.log.info("Base series name: %s", seriesName)
            self.log.info("Canonized series name: %s", canonSeriesName)
            self.log.info("Safe canonized name: %s", safeBaseName)
        return targetDir, False
Example #6
0
    def insertNames(self, buId, names):
        self.log.info("Updating name synonym table for %s with %s name(s).",
                      buId, len(names))
        with self.transaction() as cur:

            # delete the old names from the table, so if they're removed from the source, we'll match that.
            cur.execute(
                "DELETE FROM {tableName} WHERE buId=%s;".format(
                    tableName=self.nameMapTableName), (buId, ))

            alreadyAddedNames = []
            for name in names:
                fsSafeName = nt.prepFilenameForMatching(name)
                if not fsSafeName:
                    fsSafeName = nt.makeFilenameSafe(name)

                # we have to block duplicate names. Generally, it's pretty common
                # for multiple names to screen down to the same name after
                # passing through `prepFilenameForMatching()`.
                if fsSafeName in alreadyAddedNames:
                    continue

                alreadyAddedNames.append(fsSafeName)

                cur.execute(
                    """INSERT INTO %s (buId, name, fsSafeName) VALUES (%%s, %%s, %%s);"""
                    % self.nameMapTableName, (buId, name, fsSafeName))

        self.log.info("Updated!")
Example #7
0
    def doDownload(self, image_urls, origin_name, link_row_id):

        images = self.fetchImages(image_urls)

        if not images:
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
            return

        fileN = origin_name + ".zip"

        with self.row_sess_context(dbid=link_row_id) as row_tup:
            row, sess = row_tup

            container_dir = os.path.join(settings.hbSettings["dlDir"],
                                         nt.makeFilenameSafe(row.series_name))

            wholePath = os.path.join(container_dir, fileN)
            fqFName = self.save_image_set(row, sess, wholePath, images)

        with self.row_context(dbid=link_row_id) as row:
            row.state = 'processing'

        # We don't want to upload the file we just downloaded, so specify doUpload as false.
        # As a result of this, the seriesName paramerer also no longer matters
        self.processDownload(seriesName=False,
                             archivePath=fqFName,
                             doUpload=False)

        self.log.info("Done")
        with self.row_context(dbid=link_row_id) as row:
            row.state = 'complete'
            row.downloaded_at = datetime.datetime.now()
            row.last_checked = datetime.datetime.now()
Example #8
0
    def processDownloadInfo(self, linkDict):

        self.updateDbEntry(linkDict["sourceUrl"], dlState=1)

        sourcePage = linkDict["sourceUrl"]
        category = linkDict['seriesName']

        self.log.info("Retreiving item: %s", sourcePage)

        linkDict['dirPath'] = os.path.join(settings.fkSettings["dlDir"],
                                           nt.makeFilenameSafe(category))

        if not os.path.exists(linkDict["dirPath"]):
            os.makedirs(linkDict["dirPath"])
        else:
            self.log.info("Folder Path already exists?: %s",
                          linkDict["dirPath"])

        self.log.info("Folderpath: %s", linkDict["dirPath"])

        self.log.debug("Linkdict = ")
        for key, value in list(linkDict.items()):
            self.log.debug("		%s - %s", key, value)

        return linkDict
Example #9
0
def prep_check_fq_filename(fqfilename):
    fqfilename = os.path.abspath(fqfilename)

    # Add a zip extension (if needed). If this is wrong,
    # magic should handle it fine anyways (and the arch processor
    # will probably regnerate the file along the way)
    if not os.path.splitext(fqfilename)[1]:
        fqfilename = fqfilename + ".zip"

    filepath, fileN = os.path.split(fqfilename)
    filepath = clean_filename(filepath)
    fileN = nt.makeFilenameSafe(fileN)

    valid_containers = [
        settings.pickedDir, settings.baseDir, settings.unlinkedDir,
        settings.bookDir, settings.h_dir, settings.c_dir,
        settings.mangaCmsHContext
    ]
    assert any(
        [is_in_directory(filepath, dirc) for dirc in valid_containers]
    ), "Saved files must be placed in one of the download paths! File path: %s, valid containers: %s (%s)" % (
        filepath, valid_containers,
        [is_in_directory(filepath, dirc) for dirc in valid_containers])

    # Create the target container directory (if needed)
    if not os.path.exists(filepath):
        os.makedirs(filepath, exist_ok=True)  # Hurray for race conditions!

    assert os.path.isdir(filepath)

    fqfilename = os.path.join(filepath, fileN)
    fqfilename = insertCountIfFilenameExists(fqfilename)

    return fqfilename
Example #10
0
	def getUploadDirectory(self, seriesName):

		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName = nt.prepFilenameForMatching(seriesName)
			matchName = matchName.encode('latin-1', 'ignore').decode('latin-1')

			self.checkInitDirs()
			if matchName in self.unsortedDirs:
				ulDir = self.unsortedDirs[matchName]
			elif safeFilename in self.unsortedDirs:
				ulDir = self.unsortedDirs[safeFilename]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.ftp.mkd(ulDir)
				except ftplib.error_perm as e:
					# If the error is just a "directory exists" warning, ignore it silently
					if str(e).startswith("550") and str(e).endswith('File exists'):
						pass
					else:
						self.log.warn("Error creating directory?")
						self.log.warn(traceback.format_exc())


		return ulDir
Example #11
0
    def save_archive(self, row, sess, fqfilename, file_content):

        fqfilename = prep_check_fq_filename(fqfilename)
        filepath, fileN = os.path.split(fqfilename)
        self.log.info("Complete filepath: %s", fqfilename)

        chop = len(fileN) - 4

        while 1:
            try:
                with open(fqfilename, "wb") as fp:
                    fp.write(file_content)

                file_row, have_fqp = self.get_create_file_row(
                    sess, row, fqfilename)
                row.fileid = file_row.id

                return have_fqp

            except (IOError, OSError):
                chop = chop - 1
                filepath, fileN = os.path.split(fqfilename)

                fileN = fileN[:chop] + fileN[-4:]
                self.log.warn(
                    "Truncating file length to %s characters and re-encoding.",
                    chop)
                fileN = fileN.encode('utf-8', 'ignore').decode('utf-8')
                fileN = nt.makeFilenameSafe(fileN)
                fqfilename = os.path.join(filepath, fileN)
                fqfilename = insertCountIfFilenameExists(fqfilename)
Example #12
0
	def insertNames(self, buId, names):
		self.log.info("Updating name synonym table for %s with %s name(s).", buId, len(names))
		with self.transaction() as cur:


			# delete the old names from the table, so if they're removed from the source, we'll match that.
			cur.execute("DELETE FROM {tableName} WHERE buId=%s;".format(tableName=self.nameMapTableName), (buId, ))

			alreadyAddedNames = []
			for name in names:
				fsSafeName = nt.prepFilenameForMatching(name)
				if not fsSafeName:
					fsSafeName = nt.makeFilenameSafe(name)

				# we have to block duplicate names. Generally, it's pretty common
				# for multiple names to screen down to the same name after
				# passing through `prepFilenameForMatching()`.
				if fsSafeName in alreadyAddedNames:
					continue

				alreadyAddedNames.append(fsSafeName)

				cur.execute("""INSERT INTO %s (buId, name, fsSafeName) VALUES (%%s, %%s, %%s);""" % self.nameMapTableName, (buId, name, fsSafeName))

		self.log.info("Updated!")
Example #13
0
	def getDownloadInfo(self, linkDict, soup):

		infoSection = soup.find("div", id='infobox')


		category, tags, artist = self.getCategoryTags(infoSection)
		tags = ' '.join(tags)
		linkDict['artist'] = artist
		linkDict['title'] = self.getFileName(infoSection)
		linkDict['dirPath'] = os.path.join(settings.djOnSettings["dlDir"], nt.makeFilenameSafe(category))

		if not os.path.exists(linkDict["dirPath"]):
			os.makedirs(linkDict["dirPath"])
		else:
			self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])

		self.log.info("Folderpath: %s", linkDict["dirPath"])

		self.log.debug("Linkdict = ")
		for key, value in list(linkDict.items()):
			self.log.debug("		%s - %s", key, value)


		if tags:
			self.log.info("Adding tag info %s", tags)
			self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)

		self.updateDbEntry(linkDict["sourceUrl"], seriesName=category, lastUpdate=time.time())

		return linkDict
Example #14
0
    def getDoujinshiUploadDirectory(self, seriesName):
        ulDir = self.getExistingDir(seriesName)

        if not ulDir:
            seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
            safeFilename = nt.makeFilenameSafe(seriesName)
            matchName = nt.prepFilenameForMatching(seriesName)
            matchName = matchName.encode('latin-1', 'ignore').decode('latin-1')

            self.checkInitDirs()
            if matchName in self.unsortedDirs:
                ulDir = self.unsortedDirs[matchName]
            elif safeFilename in self.unsortedDirs:
                ulDir = self.unsortedDirs[safeFilename]
            else:

                self.log.info("Need to create container directory for %s",
                              seriesName)
                ulDir = os.path.join(settings.mkSettings["uploadContainerDir"],
                                     settings.mkSettings["uploadDir"],
                                     safeFilename)
                try:
                    self.sftp.mkdir(ulDir)
                except ftplib.error_perm:
                    self.log.warn("Directory exists?")
                    self.log.warn(traceback.format_exc())

        return ulDir
Example #15
0
	def getUploadDirectory(self, seriesName):

		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName   = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName    = nt.prepFilenameForMatching(seriesName)
			matchName    = matchName.encode('utf-8', 'ignore').decode('utf-8')

			self.checkInitDirs()
			if matchName in self.mainDirs:
				ulDir = self.mainDirs[matchName][0]
			elif seriesName in self.mainDirs:
				ulDir = self.mainDirs[seriesName][0]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.sftp.mkdir(ulDir)
				except OSError as e:
					# If the error is just a "directory exists" warning, ignore it silently
					if str(e) == 'OSError: File already exists':
						pass
					else:
						self.log.warn("Error creating directory?")
						self.log.warn(traceback.format_exc())


		return ulDir
Example #16
0
    def get_link(self, link_row_id):

        with self.row_context(dbid=link_row_id) as row:
            row.state = 'fetching'
            source_url = row.source_id

        try:
            dl_info = self.getDownloadInfo(source_url=source_url,
                                           row_id=link_row_id)
            images = self.getImages(dl_info=dl_info)
            file_name = dl_info['file_name']

        except WebRequest.WebGetException:
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
            return False

        if not images:
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
            return False

        fileN = file_name + ".zip"
        fileN = nt.makeFilenameSafe(fileN)

        with self.row_sess_context(dbid=link_row_id) as row_tup:
            row, sess = row_tup

            container_dir = os.path.join(settings.puSettings["dlDir"],
                                         nt.makeFilenameSafe(row.series_name))
            wholePath = os.path.join(container_dir, row.origin_name)
            fqFName = self.save_image_set(row, sess, wholePath, images)

        with self.row_context(dbid=link_row_id) as row:
            row.state = 'processing'

        # We don't want to upload the file we just downloaded, so specify doUpload as false.
        # As a result of this, the seriesName paramerer also no longer matters
        self.processDownload(seriesName=False,
                             archivePath=fqFName,
                             doUpload=False)

        self.log.info("Done")
        with self.row_context(dbid=link_row_id) as row:
            row.state = 'complete'

            row.downloaded_at = datetime.datetime.now()
Example #17
0
def renameSeriesToMatchMangaUpdates(scanpath):
	idLut = nt.MtNamesMapWrapper("fsName->buId")
	muLut = nt.MtNamesMapWrapper("buId->buName")
	db = DbInterface()
	print("Scanning")
	foundDirs = 0
	contents = os.listdir(scanpath)
	for dirName in contents:
		cName = nt.prepFilenameForMatching(dirName)
		mtId = idLut[cName]
		if mtId and len(mtId) > 1:
			print("Multiple mtId values for '%s' ('%s')" % (cName, dirName))
			print("	", mtId)
			print("	Skipping item")

		elif mtId:
			mtId = mtId.pop()
			mtName = muLut[mtId].pop()
			cMtName = nt.prepFilenameForMatching(mtName)
			if cMtName != cName:
				print("Dir '%s' ('%s')" % (cName, dirName))
				print("	Should be '%s'" % (mtName, ))
				print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (mtId, ))
				oldPath = os.path.join(scanpath, dirName)
				newPath = os.path.join(scanpath, nt.makeFilenameSafe(mtName))
				if not os.path.isdir(oldPath):
					raise ValueError("Not a dir. Wat?")



				print("	old '%s'" % (oldPath, ))
				print("	new '%s'" % (newPath, ))

				newCl = nt.cleanUnicode(newPath)
				if newCl != newPath:
					print("Unicode oddness. Skipping")
					continue

				rating = nt.extractRatingToFloat(oldPath)

				if rating != 0:
					print("	Need to add rating = ", rating)

				mv = query_response_bool("	rename?")

				if mv:

					#
					if os.path.exists(newPath):
						print("Target dir exists! Moving files instead")
						moveFiles(oldPath, newPath)
						os.rmdir(oldPath)
						nt.dirNameProxy.changeRatingPath(newPath, rating)
					else:
						os.rename(oldPath, newPath)
						nt.dirNameProxy.changeRatingPath(newPath, rating)
			foundDirs += 1

	print("Total directories that need renaming", foundDirs)
Example #18
0
	def getLink(self, link):
		sourceUrl, originFileName = link["sourceUrl"], link["originName"]

		self.log.info( "Should retreive: %s, url - %s", originFileName, sourceUrl)

		self.updateDbEntry(sourceUrl, dlState=1)
		self.conn.commit()

		fileUrl = self.getDownloadUrl(sourceUrl)
		if fileUrl is None:
			self.log.warning("Could not find url!")
			self.deleteRowsByValue(sourceUrl=sourceUrl)
			return


		try:
			content, hName = self.getLinkFile(fileUrl, sourceUrl)
		except:
			self.log.error("Unrecoverable error retreiving content %s", link)
			self.log.error("Traceback: %s", traceback.format_exc())

			self.updateDbEntry(sourceUrl, dlState=-1)
			return

		# print("Content type = ", type(content))


		# And fix %xx crap
		hName = urllib.parse.unquote(hName)

		fName = "%s - %s" % (originFileName, hName)
		fName = nt.makeFilenameSafe(fName)

		fqFName = os.path.join(link["targetDir"], fName)
		self.log.info( "SaveName = %s", fqFName)


		loop = 1
		while os.path.exists(fqFName):
			fName = "%s - (%d) - %s" % (originFileName, loop,  hName)
			fqFName = os.path.join(link["targetDir"], fName)
			loop += 1
		self.log.info( "Writing file")

		filePath, fileName = os.path.split(fqFName)

		try:
			with open(fqFName, "wb") as fp:
				fp.write(content)
		except TypeError:
			self.log.error("Failure trying to retreive content from source %s", sourceUrl)
			return
		#self.log.info( filePath)

		dedupState = processDownload.processDownload(link["seriesName"], fqFName, deleteDups=True, includePHash=True)
		self.log.info( "Done")

		self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
		return
Example #19
0
def renameSeriesToMatchMangaUpdates(scanpath):
	idLut = nt.MtNamesMapWrapper("fsName->buId")
	muLut = nt.MtNamesMapWrapper("buId->buName")
	db = DbInterface()
	print("Scanning")
	foundDirs = 0
	contents = os.listdir(scanpath)
	for dirName in contents:
		cName = nt.prepFilenameForMatching(dirName)
		mtId = idLut[cName]
		if mtId and len(mtId) > 1:
			print("Multiple mtId values for '%s' ('%s')" % (cName, dirName))
			print("	", mtId)
			print("	Skipping item")

		elif mtId:
			mtId = mtId.pop()
			mtName = muLut[mtId].pop()
			cMtName = nt.prepFilenameForMatching(mtName)
			if cMtName != cName:
				print("Dir '%s' ('%s')" % (cName, dirName))
				print("	Should be '%s'" % (mtName, ))
				print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (mtId, ))
				oldPath = os.path.join(scanpath, dirName)
				newPath = os.path.join(scanpath, nt.makeFilenameSafe(mtName))
				if not os.path.isdir(oldPath):
					raise ValueError("Not a dir. Wat?")



				print("	old '%s'" % (oldPath, ))
				print("	new '%s'" % (newPath, ))

				newCl = nt.cleanUnicode(newPath)
				if newCl != newPath:
					print("Unicode oddness. Skipping")
					continue

				rating = nt.extractRatingToFloat(oldPath)

				if rating != 0:
					print("	Need to add rating = ", rating)

				mv = query_response_bool("	rename?")

				if mv:

					#
					if os.path.exists(newPath):
						print("Target dir exists! Moving files instead")
						moveFiles(oldPath, newPath)
						os.rmdir(oldPath)
						nt.dirNameProxy.changeRatingPath(newPath, rating)
					else:
						os.rename(oldPath, newPath)
						nt.dirNameProxy.changeRatingPath(newPath, rating)
			foundDirs += 1

	print("Total directories that need renaming", foundDirs)
Example #20
0
    def getLink(self, link):

        try:
            self.updateDbEntry(link["sourceUrl"], dlState=1)
            image_url_list = self.getDownloadInfo(link)

            images = self.getImages(image_url_list)
            title = link['seriesName']
            artist = link['artist']

        except WebRequest.WebGetException:
            self.updateDbEntry(link["sourceUrl"],
                               dlState=-2,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")
            return False
        except UnwantedContentError:
            self.updateDbEntry(
                link["sourceUrl"],
                dlState=-3,
                downloadPath="ERROR",
                fileName="ERROR: Unwanted Tags applied to series!")
            return False
        except PageContentError:
            self.updateDbEntry(link["sourceUrl"],
                               dlState=-3,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED (PageContentError)")
            return False

        if images and title:
            fileN = title + " " + artist + ".zip"
            fileN = nt.makeFilenameSafe(fileN)
            wholePath = os.path.join(link["dirPath"], fileN)

            wholePath = self.save_image_set(wholePath, images)

            self.updateDbEntry(link["sourceUrl"],
                               downloadPath=link["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = MangaCMS.cleaner.processDownload.processDownload(
                None,
                wholePath,
                pron=True,
                deleteDups=True,
                includePHash=True,
                rowId=link['dbId'])
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=link["sourceUrl"], tags=dedupState)

            self.updateDbEntry(link["sourceUrl"], dlState=2)

            delay = random.randint(5, 30)
            self.log.info("Sleeping %s", delay)
            time.sleep(delay)
Example #21
0
    def doDownload(self, seriesName, dlurl, chapter_name):

        with self.row_context(url=dlurl) as row:
            if row and row.state != 'new':
                return

        link = {
            "series_name": seriesName,
            "source_id": dlurl,
            'posted_at': datetime.datetime.now(),
            'state': 'fetching'
        }

        self._process_links_into_db([link])

        try:

            fctnt, fname = self.wg.getFileAndName(dlurl)

        except:
            self.log.error("Unrecoverable error retrieving content %s",
                           (seriesName, dlurl))
            self.log.error("Traceback: %s", traceback.format_exc())

            with self.row_context(url=dlurl) as row:
                row.state = 'error'
            return

        target_dir, new_dir = self.locateOrCreateDirectoryForSeries(seriesName)
        with self.row_context(url=dlurl) as row:
            row.dirstate = 'created_dir' if new_dir else 'had_dir'
            row.origin_name = fname

        fileN = '{series} - {chap} [YoManga].zip'.format(series=seriesName,
                                                         chap=chapter_name)
        fileN = nt.makeFilenameSafe(fileN)

        fqFName = os.path.join(target_dir, fileN)

        # This call also inserts the file parameters into the row
        with self.row_sess_context(url=dlurl) as row_tup:
            row, sess = row_tup
            fqFName = self.save_archive(row, sess, fqFName, fctnt)

        #self.log.info( filePath)

        with self.row_context(url=dlurl) as row:
            row.state = 'processing'

        self.processDownload(seriesName=seriesName, archivePath=fqFName)

        self.log.info("Done")
        with self.row_context(url=dlurl) as row:
            row.state = 'complete'
            row.downloaded_at = datetime.datetime.now()
            row.last_checked = datetime.datetime.now()

        return
Example #22
0
	def getLink(self, linkDict):
		try:
			linkDict = self.getDownloadInfo(linkDict)

			images = self.getImages(linkDict)
			title = linkDict['originName']
		except WebRequest.WebGetException:
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-2, downloadPath="ERROR", fileName="ERROR: FAILED")
			return False

		if images and title:
			fileN = title+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)

			# try:
			# 	arch = zipfile.ZipFile(wholePath, "w")
			# except OSError:
			# 	title = title.encode('ascii','ignore').decode('ascii')
			# 	fileN = title+".zip"
			# 	fileN = nt.makeFilenameSafe(fileN)
			# 	wholePath = os.path.join(linkDict["dirPath"], fileN)
			# 	wholePath = self.insertCountIfFilenameExists(wholePath)
			# 	arch = zipfile.ZipFile(wholePath, "w")

			# for imageName, imageContent in images:
			# 	arch.writestr(imageName, imageContent)
			# arch.close()

			#Write all downloaded files to the archive.
			wholePath = self.save_image_set(wholePath, images)

			self.log.info("Successfully Saved to path: %s", wholePath)


			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)

			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = MangaCMS.cleaner.processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True, rowId=linkDict['dbId'])
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)


			return wholePath

		else:
			self.log.warning("No images found?")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			return False
Example #23
0
    def doDownload(self, link_info, link_row_id):

        # linkDict['dirPath'] = os.path.join(settings.sadPanda["dlDir"], linkDict['seriesName'])

        # if not os.path.exists(linkDict["dirPath"]):
        # 	os.makedirs(linkDict["dirPath"])

        # self.log.info("Folderpath: %s", linkDict["dirPath"])

        with self.row_context(dbid=link_row_id) as row:
            source_url = row.source_id
            origin_name = row.origin_name
            series_name = row.series_name

            self.update_tags(link_info['item_tags'], row=row)

        downloadUrl = self.getDownloadUrl(link_info['dlPage'], source_url)

        if not downloadUrl:
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
            return False

        fCont, fName = self.wg.getFileAndName(downloadUrl)

        # self.log.info(len(content))
        if origin_name in fName:
            fileN = fName
        else:
            fileN = '%s - %s.zip' % (origin_name, fName)
            fileN = fileN.replace('.zip .zip', '.zip')

        fileN = nt.makeFilenameSafe(fileN)
        fqFName = os.path.join(settings.sadPanda["dlDir"], series_name, fileN)

        # This call also inserts the file parameters into the row
        with self.row_sess_context(dbid=link_row_id) as row_tup:
            row, sess = row_tup
            fqFName = self.save_archive(row, sess, fqFName, fCont)

        #self.log.info( filePath)

        with self.row_context(dbid=link_row_id) as row:
            row.state = 'processing'

        # We don't want to upload the file we just downloaded, so specify doUpload as false.
        # As a result of this, the seriesName paramerer also no longer matters
        self.processDownload(seriesName=False,
                             archivePath=fqFName,
                             doUpload=False)

        self.log.info("Done")
        with self.row_context(dbid=link_row_id) as row:
            row.state = 'complete'
            row.downloaded_at = datetime.datetime.now()
            row.last_checked = datetime.datetime.now()

        return True
Example #24
0
    def doDownload(self, linkDict, retag=False):

        downloadUrl = self.getDownloadUrl(linkDict["dlPage"], linkDict["sourceUrl"])

        if downloadUrl:

            fCont, fName = self.wg.getFileAndName(downloadUrl)

            # self.log.info(len(content))
            if linkDict["originName"] in fName:
                fileN = fName
            else:
                fileN = "%s - %s.zip" % (linkDict["originName"], fName)
                fileN = fileN.replace(".zip .zip", ".zip")

            fileN = nt.makeFilenameSafe(fileN)

            chop = len(fileN) - 4

            wholePath = "ERROR"
            while 1:

                try:
                    fileN = fileN[:chop] + fileN[-4:]
                    # self.log.info("geturl with processing", fileN)
                    wholePath = os.path.join(linkDict["dirPath"], fileN)
                    self.log.info("Complete filepath: %s", wholePath)

                    # Write all downloaded files to the archive.
                    with open(wholePath, "wb") as fp:
                        fp.write(fCont)
                    self.log.info("Successfully Saved to path: %s", wholePath)
                    break
                except IOError:
                    chop = chop - 1
                    self.log.warn("Truncating file length to %s characters.", chop)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(linkDict["seriesName"], wholePath, pron=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
            self.conn.commit()

        else:

            self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

            self.conn.commit()
            return False
Example #25
0
    def getFile(self, file_data):

        row = self.getRowsByValue(sourceUrl=file_data["baseUrl"],
                                  limitByKey=False)
        if row and row[0]['dlState'] != 0:
            return
        if not row:
            self.insertIntoDb(retreivalTime=time.time(),
                              sourceUrl=file_data["baseUrl"],
                              originName=file_data["title"],
                              dlState=1,
                              seriesName=file_data["title"])

        image_links = self.getFileInfo(file_data)

        images = []
        for imagen, imageurl in image_links:
            imdat = self.get_image(imageurl, file_data['xor_key'])
            images.append((imagen, imdat))

            # filen = nt.makeFilenameSafe(file_data['title'] + " - " + imagen)
            # with open(filen, "wb") as fp:
            # 	fp.write(imdat)

        fileN = '{series} - c{chapNo:03.0f} [MangaBox].zip'.format(
            series=file_data['title'], chapNo=file_data['chapter'])
        fileN = nt.makeFilenameSafe(fileN)

        dlPath, newDir = self.locateOrCreateDirectoryForSeries(
            file_data["title"])
        wholePath = os.path.join(dlPath, fileN)

        if newDir:
            self.updateDbEntry(file_data["baseUrl"], flags="haddir")
            self.conn.commit()

        arch = zipfile.ZipFile(wholePath, "w")
        for imageName, imageContent in images:
            arch.writestr(imageName, imageContent)
        arch.close()

        self.log.info("Successfully Saved to path: %s", wholePath)

        dedupState = processDownload.processDownload(file_data["title"],
                                                     wholePath,
                                                     deleteDups=True)
        if dedupState:
            self.addTags(sourceUrl=file_data["baseUrl"], tags=dedupState)

        self.updateDbEntry(file_data["baseUrl"],
                           dlState=2,
                           downloadPath=dlPath,
                           fileName=fileN,
                           originName=fileN)

        self.conn.commit()
        self.log.info("Done")
Example #26
0
	def getMainItems(self, rangeOverride=None, rangeOffset=None):
		# for item in items:
		# 	self.log.info( item)
		#
		urlFormat = "http://www.mangatraders.com/releases/%s/"
		urlBase = "http://www.mangatraders.com/"

		self.log.info( "Loading MT Main Feed")

		ret = []
		if not rangeOverride:
			dayDelta = 3
		else:
			dayDelta = int(rangeOverride)
		if not rangeOffset:
			rangeOffset = 0

		for daysAgo in range(dayDelta):
			day = datetime.date.today() - datetime.timedelta(daysAgo+rangeOffset)
			url = urlFormat % day.strftime("%Y-%m-%d")
			page = self.wg.getpage(url)
			soup = bs4.BeautifulSoup(page)
			dataTable = soup.find("div", id="dataTable")
			for row in dataTable.find_all("tr"):
				rowItems = row.find_all("td")
				if len(rowItems) == 5:
					server, chName, seriesName, size, view = rowItems

					if chName.find("del"):
						self.log.info("Skipping file previously downloaded - %s", chName.a.string)
						continue

					item = {}
					if day == datetime.date.today():
						item["date"] = time.time()
					else:
						item["date"] = time.mktime(day.timetuple())
					item["dlName"] = chName.a.string
					item["dlLink"] =  urllib.parse.urljoin(urlBase, chName.a["href"])
					item["baseName"] = nt.makeFilenameSafe(seriesName.a.string)
					item["sourceId"] = nt.makeFilenameSafe(seriesName.a["href"].split("/")[-1])
					item["dlServer"] = server.img["alt"]
					ret.append(item)
		return ret
Example #27
0
	def retreiveTodoLinksFromDB(self):

		self.log.info( "Fetching items from db...",)

		rows = self.getRowsByValue(dlState=0)

		self.log.info( "Done")
		if not rows:
			return

		items = []
		for item in rows:

			item["retreivalTime"] = time.gmtime(item["retreivalTime"])


			baseNameLower = nt.sanitizeString(item["seriesName"])
			safeBaseName = nt.makeFilenameSafe(item["seriesName"])



			if baseNameLower in nt.dirNameProxy:
				self.log.info( "Have target dir for '%s' Dir = '%s'", baseNameLower, nt.dirNameProxy[baseNameLower]['fqPath'])
				item["targetDir"] = nt.dirNameProxy[baseNameLower]["fqPath"]
			else:
				self.log.info( "Don't have target dir for: %s Using default for: %s, full name = %s", baseNameLower, item["seriesName"], item["originName"])
				if "picked" in item["flags"]:
					targetDir = os.path.join(settings.skSettings["dirs"]['mnDir'], safeBaseName)
				else:
					targetDir = os.path.join(settings.skSettings["dirs"]['mDlDir'], safeBaseName)
				if not os.path.exists(targetDir):
					try:
						os.makedirs(targetDir)
						item["targetDir"] = targetDir
						self.updateDbEntry(item["sourceUrl"],flags=" ".join([item["flags"], "newdir"]))
						self.conn.commit()

						self.conn.commit()
					except OSError:
						self.log.critical("Directory creation failed?")
						self.log.critical(traceback.format_exc())
				else:
					self.log.warning("Directory not found in dir-dict, but it exists!")
					self.log.warning("Directory-Path: %s", targetDir)
					item["targetDir"] = targetDir

					self.updateDbEntry(item["sourceUrl"],flags=" ".join([item["flags"], "haddir"]))
					self.conn.commit()

			items.append(item)

		self.log.info( "Have %s new items to retreive in SkDownloader" % len(items))


		items = sorted(items, key=lambda k: k["retreivalTime"], reverse=True)
		return items
Example #28
0
    def doDownload(self, linkDict, retag=False):

        images = self.fetchImages(linkDict)
        # images = ['wat']
        # print(linkDict)
        # self.log.info(len(content))

        if images:
            linkDict["chapterNo"] = float(linkDict["chapterNo"])
            fileN = '{series} - c{chapNo:06.1f} - {sourceName} [crunchyroll].zip'.format(
                series=linkDict['seriesName'],
                chapNo=linkDict["chapterNo"],
                sourceName=linkDict['originName'])
            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            self.log.info("Complete filepath: %s", wholePath)

            #Write all downloaded files to the archive.
            arch = zipfile.ZipFile(wholePath, "w")
            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            self.log.info("Successfully Saved to path: %s", wholePath)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            dedupState = processDownload.processDownload(
                linkDict["seriesName"], wholePath, deleteDups=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=2,
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN,
                               originName=fileN)

            self.conn.commit()
            return wholePath

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")

            self.conn.commit()
            return False
Example #29
0
	def doDownload(self, linkDict):

		contentUrl = urllib.parse.urljoin(self.urlBase, "/zip.php?token=%s" % linkDict["contentId"])
		content, handle = self.wg.getpage(contentUrl, returnMultiple=True, addlHeaders={'Referer': linkDict["sourceUrl"]})

		# self.log.info(len(content))

		if handle:
			# self.log.info("handle = ", handle)
			# self.log.info("geturl", handle.geturl())
			urlFileN = urllib.parse.unquote(urllib.parse.urlparse(handle.geturl())[2].split("/")[-1])
			urlFileN = bs4.UnicodeDammit(urlFileN).unicode_markup
			urlFileN.encode("utf-8")




			# DjMoe is apparently returning "zip.php" for ALL filenames.
			# Blargh
			if urlFileN == "zip.php":
				urlFileN = ".zip"
				fileN = "%s%s" % (linkDict["originName"], urlFileN)
			else:
				self.log.error("Unknown file extension?")
				self.log.error("Unknown file extension?")
				self.log.error("Dict filename = %s", linkDict["originName"])
				self.log.error("URL filename = %s", urlFileN)
				fileN = "%s - %s" % (linkDict["originName"], urlFileN)

			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

			fp = open(wholePath, "wb")
			fp.write(content)
			fp.close()
			self.log.info("Successfully Saved to path: %s", wholePath)

			if not linkDict["tags"]:
				linkDict["tags"] = ""
			self.updateDbEntry(linkDict["contentId"], dlState=2, downloadPath=linkDict["dirPath"], fileName=fileN, seriesName=linkDict["seriesName"])

			self.conn.commit()

		else:

			self.updateDbEntry(linkDict["contentId"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			# cur.execute('UPDATE djmoe SET downloaded=1 WHERE contentID=?;', (linkDict["contentId"], ))
			# cur.execute('UPDATE djmoe SET dlPath=?, dlName=?, itemTags=?  WHERE contentID=?;', ("ERROR", 'ERROR: FAILED', "N/A", linkDict["contentId"]))
			# self.log.info("fetchall = ", ret.fetchall())
			self.conn.commit()
Example #30
0
    def getDownloadInfo(self, linkDict, retag=False):
        sourcePage = linkDict["sourceUrl"]

        self.log.info("Retreiving item: %s", sourcePage)

        if not retag:
            self.updateDbEntry(linkDict["sourceUrl"], dlState=1)

        try:
            soup = self.wg.getSoup(
                sourcePage, addlHeaders={'Referer': 'http://hbrowse.com/'})
        except:
            self.log.critical("No download at url %s! SourceUrl = %s",
                              sourcePage, linkDict["sourceUrl"])
            raise IOError("Invalid webpage")

        title, category, tags = self.getCategoryTags(soup)
        tags = ' '.join(tags)

        self.updateDbEntry(linkDict["sourceUrl"],
                           seriesName=category,
                           originName=title,
                           lastUpdate=time.time())

        # Push the fixed title back into the linkdict so it's changes will be used later
        # when saving the file.
        linkDict['originName'] = title
        if tags:
            self.log.info("Adding tag info %s", tags)
            self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)

        if retag:
            return

        linkDict['dirPath'] = os.path.join(settings.hbSettings["dlDir"],
                                           nt.makeFilenameSafe(category))

        if not os.path.exists(linkDict["dirPath"]):
            os.makedirs(linkDict["dirPath"])
        else:
            self.log.info("Folder Path already exists?: %s",
                          linkDict["dirPath"])

        self.log.info("Folderpath: %s", linkDict["dirPath"])
        #self.log.info(os.path.join())

        startPages = self.getGalleryStartPages(soup)

        linkDict["dlLink"] = startPages

        self.log.debug("Linkdict = ")
        for key, value in list(linkDict.items()):
            self.log.debug("		%s - %s", key, value)

        return linkDict
Example #31
0
	def doDownload(self, linkDict, retag=False):

		images = self.fetchImages(linkDict)


		# self.log.info(len(content))

		if images:
			fileN = linkDict['originName']+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(wholePath, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)

			if not linkDict["tags"]:
				linkDict["tags"] = ""



			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)


			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
			self.conn.commit()




			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			self.conn.commit()
			return False
Example #32
0
	def getDownloadInfo(self, linkDict, retag=False):
		sourcePage = linkDict["sourceUrl"]

		self.log.info("Retreiving item: %s", sourcePage)

		if not retag:
			self.updateDbEntry(linkDict["sourceUrl"], dlState=1)


		try:
			soup = self.wg.getSoup(sourcePage, addlHeaders={'Referer': 'http://hbrowse.com/'})
		except:
			self.log.critical("No download at url %s! SourceUrl = %s", sourcePage, linkDict["sourceUrl"])
			raise IOError("Invalid webpage")

		title, category, tags = self.getCategoryTags(soup)
		tags = ' '.join(tags)

		self.updateDbEntry(linkDict["sourceUrl"], seriesName=category, originName=title, lastUpdate=time.time())

		# Push the fixed title back into the linkdict so it's changes will be used later
		# when saving the file.
		linkDict['originName'] = title
		if tags:
			self.log.info("Adding tag info %s", tags)
			self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)

		if retag:
			return

		linkDict['dirPath'] = os.path.join(settings.hbSettings["dlDir"], nt.makeFilenameSafe(category))

		if not os.path.exists(linkDict["dirPath"]):
			os.makedirs(linkDict["dirPath"])
		else:
			self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])


		self.log.info("Folderpath: %s", linkDict["dirPath"])
		#self.log.info(os.path.join())


		startPages = self.getGalleryStartPages(soup)


		linkDict["dlLink"] = startPages



		self.log.debug("Linkdict = ")
		for key, value in list(linkDict.items()):
			self.log.debug("		%s - %s", key, value)


		return linkDict
Example #33
0
    def get_link(self, link_row_id):
        try:
            link_info = self.getDownloadInfo(link_row_id)
            images = self.getImages(link_info)
            title = link_info['title']
            artist = link_info['artist']

        except WebRequest.WebGetException:
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
            return False

        if not (images and title):
            return False

        with self.row_sess_context(dbid=link_row_id) as row_tup:
            row, sess = row_tup

            fileN = title + " - " + artist + ".zip"
            fileN = nt.makeFilenameSafe(fileN)

            container_dir = os.path.join(settings.hitSettings["dlDir"],
                                         nt.makeFilenameSafe(row.series_name))

            wholePath = os.path.join(container_dir, fileN)

            fqFName = self.save_image_set(row, sess, wholePath, images)

        with self.row_context(dbid=link_row_id) as row:
            row.state = 'processing'

        # We don't want to upload the file we just downloaded, so specify doUpload as false.
        # As a result of this, the seriesName paramerer also no longer matters
        self.processDownload(seriesName=False,
                             archivePath=fqFName,
                             doUpload=False)

        self.log.info("Done")
        with self.row_context(dbid=link_row_id) as row:
            row.state = 'complete'
            row.downloaded_at = datetime.datetime.now()
            row.last_checked = datetime.datetime.now()
Example #34
0
 def getDirAndFName(self, soup):
     title = soup.find("div", class_="folder-title")
     if not title:
         raise PageContentError("Could not find title. Wat?")
     titleSplit = title.get_text().split("»")
     safePath = [nt.makeFilenameSafe(item.strip()) for item in titleSplit]
     fqPath = os.path.join(settings.djSettings["dlDir"], *safePath)
     dirPath, fName = fqPath.rsplit("/", 1)
     self.log.info("dirPath = %s", dirPath)
     self.log.info("fName = %s", fName)
     return dirPath, fName, titleSplit[-1].strip()
	def getDirAndFName(self, soup):
		title = soup.find("div", class_="title")
		if not title:
			raise ValueError("Could not find title. Wat?")
		titleSplit = title.get_text().split("»")
		safePath = [nt.makeFilenameSafe(item.rstrip().lstrip()) for item in titleSplit]
		fqPath = os.path.join(settings.djSettings["dlDir"], *safePath)
		dirPath, fName = fqPath.rsplit("/", 1)
		self.log.debug("dirPath = %s", dirPath)
		self.log.debug("fName = %s", fName)
		return dirPath, fName, title.get_text()
	def getDownloadInfo(self, linkDict, retag=False):
		sourcePage = linkDict["sourceUrl"]

		self.log.info("Retreiving item: %s", sourcePage)

		if not retag:
			self.updateDbEntry(linkDict["sourceUrl"], dlState=1)


		cont = self.wg.getpage(sourcePage, addlHeaders={'Referer': 'http://pururin.com/'})
		soup = bs4.BeautifulSoup(cont, "lxml")

		if not soup:
			self.log.critical("No download at url %s! SourceUrl = %s", sourcePage, linkDict["sourceUrl"])
			raise IOError("Invalid webpage")

		category, tags = self.getCategoryTags(soup)
		note = self.getNote(soup)
		tags = ' '.join(tags)

		linkDict['dirPath'] = os.path.join(settings.puSettings["dlDir"], nt.makeFilenameSafe(category))

		if not os.path.exists(linkDict["dirPath"]):
			os.makedirs(linkDict["dirPath"])
		else:
			self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])


		self.log.info("Folderpath: %s", linkDict["dirPath"])
		#self.log.info(os.path.join())

		dlPage = soup.find("a", class_="link-next")
		linkDict["dlLink"] = urllib.parse.urljoin(self.urlBase, dlPage["href"])

		self.log.debug("Linkdict = ")
		for key, value in list(linkDict.items()):
			self.log.debug("		%s - %s", key, value)


		if tags:
			self.log.info("Adding tag info %s", tags)

			self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)
		if note:
			self.log.info("Adding note %s", note)
			self.updateDbEntry(linkDict["sourceUrl"], note=note)


		self.updateDbEntry(linkDict["sourceUrl"], seriesName=category, lastUpdate=time.time())



		return linkDict
Example #37
0
	def getFile(self, file_data):


		row = self.getRowsByValue(sourceUrl=file_data["baseUrl"], limitByKey=False)
		if row and row[0]['dlState'] != 0:
			return
		if not row:
			self.insertIntoDb(retreivalTime = time.time(),
								sourceUrl   = file_data["baseUrl"],
								originName  = file_data["title"],
								dlState     = 1,
								seriesName  = file_data["title"])

		image_links = self.getFileInfo(file_data)

		images = []
		for imagen, imageurl in image_links:
			imdat = self.get_image(imageurl, file_data['xor_key'])
			images.append((imagen, imdat))

			# filen = nt.makeFilenameSafe(file_data['title'] + " - " + imagen)
			# with open(filen, "wb") as fp:
			# 	fp.write(imdat)




		fileN = '{series} - c{chapNo:03.0f} [MangaBox].zip'.format(series=file_data['title'], chapNo=file_data['chapter'])
		fileN = nt.makeFilenameSafe(fileN)

		dlPath, newDir = self.locateOrCreateDirectoryForSeries(file_data["title"])
		wholePath = os.path.join(dlPath, fileN)


		if newDir:
			self.updateDbEntry(file_data["baseUrl"], flags="haddir")
			self.conn.commit()

		arch = zipfile.ZipFile(wholePath, "w")
		for imageName, imageContent in images:
			arch.writestr(imageName, imageContent)
		arch.close()

		self.log.info("Successfully Saved to path: %s", wholePath)

		dedupState = processDownload.processDownload(file_data["title"], wholePath, deleteDups=True)
		if dedupState:
			self.addTags(sourceUrl=file_data["baseUrl"], tags=dedupState)

		self.updateDbEntry(file_data["baseUrl"], dlState=2, downloadPath=dlPath, fileName=fileN, originName=fileN)

		self.conn.commit()
		self.log.info( "Done")
Example #38
0
    def getDownloadInfo(self, linkDict, retag=False):
        sourcePage = linkDict["sourceUrl"]

        self.log.info("Retreiving item: %s", sourcePage)

        if not retag:
            self.updateDbEntry(linkDict["sourceUrl"], dlState=1)

        cont = self.wg.getpage(sourcePage,
                               addlHeaders={'Referer': 'http://pururin.com/'})
        soup = bs4.BeautifulSoup(cont, "lxml")

        if not soup:
            self.log.critical("No download at url %s! SourceUrl = %s",
                              sourcePage, linkDict["sourceUrl"])
            raise IOError("Invalid webpage")

        category, tags = self.getCategoryTags(soup)
        note = self.getNote(soup)
        tags = ' '.join(tags)

        linkDict['dirPath'] = os.path.join(settings.puSettings["dlDir"],
                                           nt.makeFilenameSafe(category))

        if not os.path.exists(linkDict["dirPath"]):
            os.makedirs(linkDict["dirPath"])
        else:
            self.log.info("Folder Path already exists?: %s",
                          linkDict["dirPath"])

        self.log.info("Folderpath: %s", linkDict["dirPath"])
        #self.log.info(os.path.join())

        dlPage = soup.find("a", class_="link-next")
        linkDict["dlLink"] = urllib.parse.urljoin(self.urlBase, dlPage["href"])

        self.log.debug("Linkdict = ")
        for key, value in list(linkDict.items()):
            self.log.debug("		%s - %s", key, value)

        if tags:
            self.log.info("Adding tag info %s", tags)

            self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)
        if note:
            self.log.info("Adding note %s", note)
            self.updateDbEntry(linkDict["sourceUrl"], note=note)

        self.updateDbEntry(linkDict["sourceUrl"],
                           seriesName=category,
                           lastUpdate=time.time())

        return linkDict
Example #39
0
	def save_image_set(self, fqfilename, image_list):

		filepath, fileN = os.path.split(fqfilename)
		fileN = fileN.replace('.zip .zip', '.zip')
		fileN = fileN.replace('.zip.zip', '.zip')
		fileN = fileN.replace(' .zip', '.zip')
		fileN = fileN.replace('..zip', '.zip')
		fileN = nt.makeFilenameSafe(fileN)

		fqfilename = os.path.join(filepath, fileN)
		fqfilename = self.insertCountIfFilenameExists(fqfilename)
		self.log.info("Complete filepath: %s", fqfilename)


		chop = len(fileN)-4

		while 1:
			try:
				arch = zipfile.ZipFile(fqfilename, "w")

				#Write all downloaded files to the archive.
				for imageName, imageContent in image_list:
					assert isinstance(imageName, str)
					assert isinstance(imageContent, bytes)
					arch.writestr(imageName, imageContent)
				arch.close()
				return fqfilename

			except (IOError, OSError):
				chop = chop - 1
				filepath, fileN = os.path.split(fqfilename)

				fileN = fileN[:chop]+fileN[-4:]
				self.log.warn("Truncating file length to %s characters and re-encoding.", chop)
				fileN = fileN.encode('utf-8','ignore').decode('utf-8')
				fileN = nt.makeFilenameSafe(fileN)
				fqfilename = os.path.join(filepath, fileN)
				fqfilename = self.insertCountIfFilenameExists(fqfilename)
Example #40
0
	def getDownloadInfo(self, linkDict):
		sourcePage = linkDict["sourceUrl"]

		self.log.info("Retreiving item: %s", sourcePage)

		self.updateDbEntry(linkDict["sourceUrl"], dlState=1)

		soup = self.wg.getSoup(sourcePage, addlHeaders={'Referer': 'http://pururin.us/'})

		if not soup:
			self.log.critical("No download at url %s! SourceUrl = %s", sourcePage, linkDict["sourceUrl"])
			raise IOError("Invalid webpage")

		category, tags = self.getCategoryTags(soup)
		note = self.getNote(soup)
		tags = ' '.join(tags)

		linkDict['originName'] = self.getFileName(soup)
		linkDict['dirPath'] = os.path.join(settings.puSettings["dlDir"], nt.makeFilenameSafe(category))

		if not os.path.exists(linkDict["dirPath"]):
			os.makedirs(linkDict["dirPath"])
		else:
			self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])

		self.log.info("Folderpath: %s", linkDict["dirPath"])

		self.log.debug("Linkdict = ")
		for key, value in list(linkDict.items()):
			self.log.debug("		%s - %s", key, value)


		if tags:
			self.log.info("Adding tag info %s", tags)

			self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)
		if note:
			self.log.info("Adding note %s", note)
			self.updateDbEntry(linkDict["sourceUrl"], note=note)


		read_url = soup.find("a", text=re.compile("Read Online", re.IGNORECASE))
		spage = urllib.parse.urljoin(self.urlBase, read_url['href'])

		linkDict["spage"] = spage

		self.updateDbEntry(linkDict["sourceUrl"], seriesName=category, lastUpdate=time.time())

		return linkDict
Example #41
0
	def doDownload(self, linkDict, retag=False):

		images = self.fetchImages(linkDict)
		# images = ['wat']
		# print(linkDict)
		# self.log.info(len(content))

		if images:
			linkDict["chapterNo"] = float(linkDict["chapterNo"])
			fileN = '{series} - c{chapNo:06.1f} - {sourceName} [crunchyroll].zip'.format(series=linkDict['seriesName'], chapNo=linkDict["chapterNo"], sourceName=linkDict['originName'])
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(wholePath, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)

			if not linkDict["tags"]:
				linkDict["tags"] = ""



			dedupState = processDownload.processDownload(linkDict["seriesName"], wholePath, deleteDups=True)
			self.log.info( "Done")


			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

			self.updateDbEntry(linkDict["sourceUrl"], dlState=2, downloadPath=linkDict["dirPath"], fileName=fileN, originName=fileN)

			self.conn.commit()
			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			self.conn.commit()
			return False
Example #42
0
	def getDownloadInfo(self, linkDict):
		sourcePage = linkDict["sourceUrl"]

		self.log.info("Retrieving item: %s", sourcePage)

		self.updateDbEntry(linkDict["sourceUrl"], dlState=1)

		soup = self.wg.getSoup(sourcePage, addlHeaders={'Referer': 'http://pururin.us/'})

		if not soup:
			self.log.critical("No download at url %s! SourceUrl = %s", sourcePage, linkDict["sourceUrl"])
			raise IOError("Invalid webpage")

		category, tags = self.getCategoryTags(soup)
		note = self.getNote(soup)
		tags = ' '.join(tags)

		linkDict['originName'] = self.getFileName(soup)
		linkDict['dirPath'] = os.path.join(settings.puSettings["dlDir"], nt.makeFilenameSafe(category))

		if not os.path.exists(linkDict["dirPath"]):
			os.makedirs(linkDict["dirPath"])
		else:
			self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])

		self.log.info("Folderpath: %s", linkDict["dirPath"])

		self.log.debug("Linkdict = ")
		for key, value in list(linkDict.items()):
			self.log.debug("		%s - %s", key, value)


		if tags:
			self.log.info("Adding tag info %s", tags)

			self.addTags(sourceUrl=linkDict["sourceUrl"], tags=tags)
		if note:
			self.log.info("Adding note %s", note)
			self.updateDbEntry(linkDict["sourceUrl"], note=note)


		read_url = soup.find("a", text=re.compile("Read Online", re.IGNORECASE))
		spage = urllib.parse.urljoin(self.urlBase, read_url['href'])

		linkDict["spage"] = spage

		self.updateDbEntry(linkDict["sourceUrl"], seriesName=category, lastUpdate=time.time())

		return linkDict
Example #43
0
	def getFeed(self, url):
		# for item in items:
		# 	self.log.info( item)
		#
		self.log.info( "Loading MT Feeds")
		feed = self.loadFeed(url)
		ret = []
		for feedEntry in feed["entries"]:
			item = {}
			#for key, value in feedEntry.iteritems():
			#	self.log.info( key, value)
			#self.log.info( feedEntry["links"][0]["href"])

			dlName = feedEntry["title_detail"]["value"]
			dlLink = feedEntry["links"][0]["href"]
			item["dlName"] = dlName
			item["dlLink"] = dlLink
			item["date"] = time.mktime(feedEntry['published_parsed'])
			#self.log.info( "date = ", feedEntry['published_parsed'])

			nameRe = re.compile(r"<b>Series:</b> <a href=\"http://www.mangatraders.com/manga/series/(\d+)\">(.+?)</a>")

			result = nameRe.search(feedEntry["summary_detail"]["value"])
			if result:
				item["sourceId"] = nt.makeFilenameSafe(result.group(1))
				item["baseName"] = nt.makeFilenameSafe(result.group(2))
			else:
				self.log.warning("Need to manually clean filename. What's going on?")
				tempCleaned = nt.getCleanedName(dlName)

				item["baseName"] = nt.makeFilenameSafe(tempCleaned)
				item["sourceId"] = None

			ret.append(item)

		return ret
Example #44
0
	def getItemFromContainer(self, segmentSoup, addDate):
		seriesName, chapter = segmentSoup.get_text().strip().split(" chapter ")

		chName = "{series} - {chapter}".format(series=seriesName, chapter=chapter)

		# chName, seriesName, size, view = segmentSoupItems


		item = {}

		item["date"] = time.mktime(addDate.timetuple())
		item["dlName"] = chName
		item["dlLink"] =  urllib.parse.urljoin(self.urlBase, segmentSoup.a["href"])
		item["baseName"] = nt.makeFilenameSafe(seriesName)

		return item
Example #45
0
    def doDownload(self, seriesName, dlurl, chapter_name):

        row = self.getRowsByValue(sourceUrl=dlurl, limitByKey=False)
        if row and row[0]['dlState'] != 0:
            return

        if not row:
            self.insertIntoDb(retreivalTime=time.time(),
                              sourceUrl=dlurl,
                              originName=seriesName,
                              dlState=1,
                              seriesName=seriesName)

        fctnt, fname = self.wg.getFileAndName(dlurl)

        fileN = '{series} - {chap} [YoManga].zip'.format(series=seriesName,
                                                         chap=chapter_name)
        fileN = nt.makeFilenameSafe(fileN)

        dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)
        wholePath = os.path.join(dlPath, fileN)

        self.log.info("Source name: %s", fname)
        self.log.info("Generated name: %s", fileN)

        if newDir:
            self.updateDbEntry(dlurl, flags="haddir")
            self.conn.commit()

        with open(wholePath, "wb") as fp:
            fp.write(fctnt)

        self.log.info("Successfully Saved to path: %s", wholePath)

        dedupState = processDownload.processDownload(seriesName,
                                                     wholePath,
                                                     deleteDups=True)
        if dedupState:
            self.addTags(sourceUrl=dlurl, tags=dedupState)

        self.updateDbEntry(dlurl,
                           dlState=2,
                           downloadPath=dlPath,
                           fileName=fileN,
                           originName=fileN)

        self.conn.commit()
Example #46
0
    def getFilenameFromIdName(self, rowid, filename):
        if not os.path.exists(settings.bookCachePath):
            self.log.warn("Cache directory for book items did not exist. Creating")
            self.log.warn("Directory at path '%s'", settings.bookCachePath)
            os.makedirs(settings.bookCachePath)

            # one new directory per 1000 items.
        dirName = "%s" % (rowid // 1000)
        dirPath = os.path.join(settings.bookCachePath, dirName)
        if not os.path.exists(dirPath):
            os.mkdir(dirPath)

        filename = "ID%s - %s" % (rowid, filename)
        filename = nameTools.makeFilenameSafe(filename)
        fqpath = os.path.join(dirPath, filename)

        return fqpath
Example #47
0
	def getContainerPages(self, firstPageUrl):

		gid = urllib.parse.urlsplit(firstPageUrl).fragment

		# Korean Webtoons are non-paginated in their default state
		# this breaks shit, so we force paginated mode.
		if not firstPageUrl.endswith("_1_t"):
			firstPageUrl += "_1_t"


		pageUrl = firstPageUrl

		basepage = self.wg.getpage(pageUrl)

		seriesName = "Unknown - ERROR"
		chapterVol = "Unknown - ERROR"
		group      = "Unknown - ERROR"

		images = []
		for pgnum in range(1, 9999999):
			ajaxurl = "https://bato.to/areader?id={id}&p={pgnum}&supress_webtoon=t".format(id=gid, pgnum=pgnum)
			extra_headers = {
				"X-Requested-With" : "XMLHttpRequest",
				"Referer"          : "https://bato.to/reader",
			}
			subpage = self.wg.getSoup(ajaxurl, addlHeaders=extra_headers)
			imgtag = subpage.find("img", id='comic_page')
			if not imgtag:
				self.log.warning("No image - Breaking")
				break

			seriesName, chapterVol = self.extractFilename(imgtag['alt'])
			images.append(imgtag['src'])

			group_container = subpage.find("select", {'name' : 'group_select'})
			if group_container and group_container.find(True, {"selected" : "selected"}):
				group = group_container.find(True, {"selected" : "selected"}).get_text(strip=True)
				group = group.replace(' - English', "")
				group = nt.makeFilenameSafe(group)

			pages = subpage.find("select", id='page_select')
			if pgnum + 1 > len(pages.find_all("option")):
				break

		return seriesName, chapterVol, group, images
Example #48
0
    def getFilenameFromIdName(self, rowid, filename):
        if not os.path.exists(settings.bookCachePath):
            self.log.warn(
                "Cache directory for book items did not exist. Creating")
            self.log.warn("Directory at path '%s'", settings.bookCachePath)
            os.makedirs(settings.bookCachePath)

        # one new directory per 1000 items.
        dirName = '%s' % (rowid // 1000)
        dirPath = os.path.join(settings.bookCachePath, dirName)
        if not os.path.exists(dirPath):
            os.mkdir(dirPath)

        filename = 'ID%s - %s' % (rowid, filename)
        filename = nameTools.makeFilenameSafe(filename)
        fqpath = os.path.join(dirPath, filename)

        return fqpath
Example #49
0
	def doDownload(self, seriesName, dlurl, chapter_name):


		row = self.getRowsByValue(sourceUrl=dlurl, limitByKey=False)
		if row and row[0]['dlState'] != 0:
			return

		if not row:
			self.insertIntoDb(retreivalTime = time.time(),
								sourceUrl   = dlurl,
								originName  = seriesName,
								dlState     = 1,
								seriesName  = seriesName)


		fctnt, fname = self.wg.getFileAndName(dlurl)


		fileN = '{series} - {chap} [YoManga].zip'.format(series=seriesName, chap=chapter_name)
		fileN = nt.makeFilenameSafe(fileN)

		dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)
		wholePath = os.path.join(dlPath, fileN)

		self.log.info("Source name: %s", fname)
		self.log.info("Generated name: %s", fileN)

		if newDir:
			self.updateDbEntry(dlurl, flags="haddir")
			self.conn.commit()

		with open(wholePath, "wb") as fp:
			fp.write(fctnt)

		self.log.info("Successfully Saved to path: %s", wholePath)


		dedupState = processDownload.processDownload(seriesName, wholePath, deleteDups=True)
		if dedupState:
			self.addTags(sourceUrl=dlurl, tags=dedupState)

		self.updateDbEntry(dlurl, dlState=2, downloadPath=dlPath, fileName=fileN, originName=fileN)

		self.conn.commit()
Example #50
0
    def locateOrCreateDirectoryForSeries(self, seriesName):

        if self.shouldCanonize:
            canonSeriesName = nt.getCanonicalMangaUpdatesName(seriesName)
        else:
            canonSeriesName = seriesName

        safeBaseName = nt.makeFilenameSafe(canonSeriesName)

        if canonSeriesName in nt.dirNameProxy:
            self.log.info("Have target dir for '%s' Dir = '%s'",
                          canonSeriesName,
                          nt.dirNameProxy[canonSeriesName]['fqPath'])
            return nt.dirNameProxy[canonSeriesName]["fqPath"], False
        else:
            self.log.info("Don't have target dir for: %s, full name = %s",
                          canonSeriesName, seriesName)
            targetDir = os.path.join(settings.baseDir, safeBaseName)
            if not os.path.exists(targetDir):
                try:
                    os.makedirs(targetDir)
                    return targetDir, True

                except FileExistsError:
                    # Probably means the directory was concurrently created by another thread in the background?
                    self.log.critical(
                        "Directory doesn't exist, and yet it does?")
                    self.log.critical(traceback.format_exc())
                    pass
                except OSError:
                    self.log.critical("Directory creation failed?")
                    self.log.critical(traceback.format_exc())

            else:
                self.log.warning(
                    "Directory not found in dir-dict, but it exists!")
                self.log.warning("Directory-Path: %s", targetDir)
                self.log.warning("Base series name: %s", seriesName)
                self.log.warning("Canonized series name: %s", canonSeriesName)
                self.log.warning("Safe canonized name: %s", safeBaseName)
            return targetDir, False
Example #51
0
    def locateOrCreateDirectoryForSeries(self, seriesName):

        if self.shouldCanonize:
            canonSeriesName = nt.getCanonicalMangaUpdatesName(seriesName)
        else:
            canonSeriesName = seriesName

        safeBaseName = nt.makeFilenameSafe(canonSeriesName)

        if canonSeriesName in nt.dirNameProxy:
            self.log.info(
                "Have target dir for '%s' Dir = '%s'", canonSeriesName, nt.dirNameProxy[canonSeriesName]["fqPath"]
            )
            return nt.dirNameProxy[canonSeriesName]["fqPath"], False
        else:
            self.log.info("Don't have target dir for: %s, full name = %s", canonSeriesName, seriesName)
            targetDir = os.path.join(settings.baseDir, safeBaseName)
            if not os.path.exists(targetDir):
                try:
                    os.makedirs(targetDir)
                    return targetDir, True

                except FileExistsError:
                    # Probably means the directory was concurrently created by another thread in the background?
                    self.log.critical("Directory doesn't exist, and yet it does?")
                    self.log.critical(traceback.format_exc())
                    pass
                except OSError:
                    self.log.critical("Directory creation failed?")
                    self.log.critical(traceback.format_exc())

            else:
                self.log.warning("Directory not found in dir-dict, but it exists!")
                self.log.warning("Directory-Path: %s", targetDir)
                self.log.warning("Base series name: %s", seriesName)
                self.log.warning("Canonized series name: %s", canonSeriesName)
                self.log.warning("Safe canonized name: %s", safeBaseName)
            return targetDir, False
Example #52
0
	def getPersonalItems(self):
		page = self.wg.getpage(self.watchedItemURL)
		soup = bs4.BeautifulSoup(page)
		ret = []
		for fileBlock in soup.find_all("file"):

			mangaName = fileBlock.cat_disp.string
			cleanedName = nt.makeFilenameSafe(mangaName)
			addDate = calendar.timegm(parser.parse(fileBlock.file_add_date.string).utctimetuple())
			fileName = fileBlock.file_disp.string
			sourceId = fileBlock.file_cat.string
			fileID = fileBlock.fileid.string

			item = {}
			item["date"] = addDate
			item["dlName"] = fileName
			item["dlLink"] =  "http://www.mangatraders.com/download/file/%s" % fileID
			item["baseName"] = cleanedName
			item["sourceId"] = sourceId
			item["dlServer"] = ""
			ret.append(item)

		return ret
Example #53
0
	def processDownloadInfo(self, linkDict):

		self.updateDbEntry(linkDict["sourceUrl"], dlState=1)

		sourcePage = linkDict["sourceUrl"]
		category   = linkDict['seriesName']

		self.log.info("Retreiving item: %s", sourcePage)

		linkDict['dirPath'] = os.path.join(settings.fkSettings["dlDir"], nt.makeFilenameSafe(category))

		if not os.path.exists(linkDict["dirPath"]):
			os.makedirs(linkDict["dirPath"])
		else:
			self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])

		self.log.info("Folderpath: %s", linkDict["dirPath"])

		self.log.debug("Linkdict = ")
		for key, value in list(linkDict.items()):
			self.log.debug("		%s - %s", key, value)


		return linkDict
Example #54
0
	def getLink(self, link):
		sourceUrl = link["sourceUrl"]


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			seriesName, chapterVol, imageUrls = self.getContainerPages(sourceUrl)
			if not seriesName and not chapterVol and not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("Page not found - 404")
				self.updateDbEntry(sourceUrl, dlState=-1)
				return

			self.log.info("Downloading = '%s', '%s'", seriesName, chapterVol)
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterNameRaw = " - ".join((seriesName, chapterVol))
			chapterName = nt.makeFilenameSafe(chapterNameRaw)

			fqFName = os.path.join(dlPath, chapterName+" [batoto].zip")

			loop = 1
			while os.path.exists(fqFName):
				fName = "%s - (%d).zip" % (chapterName, loop)
				fqFName = os.path.join(dlPath, fName)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			for imgUrl in imageUrls:
				self.log.info("Fetching content for item: %s", imgUrl)
				imageName, imageContent = self.getImage(imgUrl, "http://bato.to/reader")

				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, seriesName=seriesName, originName=chapterNameRaw, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=False)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, seriesName=seriesName, originName=chapterNameRaw, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
Example #55
0
    def getDownloadInfo(self, content_id):

        self.log.info("Retrieving metadata for item: %s", content_id)

        ret = {}

        if not content_id.startswith("http"):
            sourcePage = urllib.parse.urljoin(
                self.urlBase, "/gallery/{gid}".format(gid=content_id))
        else:
            sourcePage = content_id

        soup = self.wg.getSoup(sourcePage)
        if not soup:
            self.log.critical("No download at url %s! SourceUrl = %s",
                              sourcePage, content_id)
            raise PageContentError()

        try:
            dirPath, originName, seriesName = self.getDirAndFName(soup)
        except AttributeError:
            self.log.critical("No download at url %s! SourceUrl = %s",
                              sourcePage, content_id)
            raise PageContentError()

        except ValueError:
            self.log.critical("No download at url %s! SourceUrl = %s",
                              sourcePage, content_id)
            raise PageContentError()

        image_container = soup.find("div", id='image-container')

        ret_link_list = []
        for img_tag in image_container.find_all("img"):
            if img_tag['data-link'] == "/subscribe":
                raise PageContentError("Subscription content!")

            assert img_tag['data-file'], "Missing url for image: %s" % img_tag
            ret_link_list.append((img_tag['data-file'], sourcePage))

        note = soup.find("div", class_="message")
        if note is None or note.string is None:
            note = " "
        else:
            note = nt.makeFilenameSafe(note.string)

        tags = soup.find("li", class_="tag-area")
        tagList = []
        if tags:
            for tag in tags.find_all("a"):
                tag_tmp = tag.get_text()
                tagList.append(
                    tag_tmp.lower().rstrip(", ").lstrip(", ").replace(
                        " ", "-"))

        artist_area = soup.find('div', class_='gallery-artist')
        aList = []
        if artist_area:
            for artist_link in artist_area.find_all("a"):
                a_tag = artist_link.get_text(strip=True)
                aList.append(a_tag)
                a_tag = "artist " + a_tag
                tagList.append(a_tag.lower().rstrip(", ").lstrip(", ").replace(
                    " ", "-"))

        artist = ",".join(aList)

        ret = {
            'artist': artist,
            'dirPath': dirPath,
            'originName': originName,
            'seriesName': seriesName,
            'tagList': tagList,
            'note': note,
            'ret_link_list': ret_link_list,
        }

        # if not os.path.exists(linkDict["dirPath"]):
        # 	os.makedirs(linkDict["dirPath"])
        # else:
        # 	self.log.info("Folder Path already exists?: %s", linkDict["dirPath"])

        # self.log.info("Folderpath: %s", linkDict["dirPath"])
        # #self.log.info(os.path.join())

        # self.log.debug("Linkdict = ")
        # for key, value in list(linkDict.items()):
        # 	self.log.debug("		%s - %s", key, value)

        return ret
Example #56
0
    def get_link(self, link_row_id):

        images = None

        with self.row_context(dbid=link_row_id) as row:
            source_url = row.source_id
            row.state = 'fetching'

        try:

            dl_info = self.getDownloadInfo(content_id=source_url)

            # ret = {
            # 	'artist'        : artist,
            # 	'dirPath'       : dirPath,
            # 	'originName'    : originName,      -
            # 	'seriesName'    : seriesName,      -
            # 	'tagList'       : tagList,         -
            # 	'note'          : note,            -
            # 	'ret_link_list' : ret_link_list,   -
            # }

            with self.row_context(dbid=link_row_id) as row:
                self.update_tags(dl_info['tagList'], row=row)

                if dl_info['note']:
                    row.additional_metadata = {'note': dl_info['note']}
                row.series_name = dl_info['seriesName']
                row.origin_name = dl_info['originName']
                row.lastUpdate = datetime.datetime.now()

            images = self.getImages(dl_info['ret_link_list'])

        except WebRequest.WebGetException:
            self.log.info("WebRequest.WebGetException for item ID: %s",
                          link_row_id)
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
                row.err_str = traceback.format_exc()
            return False

        except PageContentError:
            self.log.info("PageContentError for item ID: %s", link_row_id)
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
                row.err_str = traceback.format_exc()
            return False

        if not (images and dl_info['seriesName']):
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
            return False

        fileN = dl_info['seriesName'] + " - " + dl_info['artist'] + ".zip"
        fileN = nt.makeFilenameSafe(fileN)

        container_dir = dl_info['dirPath']

        with self.row_sess_context(dbid=link_row_id) as row_tup:
            row, sess = row_tup

            wholePath = os.path.join(container_dir, fileN)
            fqFName = self.save_image_set(row, sess, wholePath, images)

        with self.row_context(dbid=link_row_id) as row:
            row.state = 'processing'

        # We don't want to upload the file we just downloaded, so specify doUpload as false.
        # As a result of this, the seriesName paramerer also no longer matters
        self.processDownload(seriesName=False,
                             archivePath=fqFName,
                             doUpload=False)

        self.log.info("Done")
        with self.row_context(dbid=link_row_id) as row:
            row.state = 'complete'
            row.downloaded_at = datetime.datetime.now()
            row.last_checked = datetime.datetime.now()

        delay = random.randint(5, 30)
        self.log.info("Sleeping %s", delay)
        time.sleep(delay)

        return True
Example #57
0
    def doDownload(self, linkDict):

        images = []
        title = None
        nextPage = linkDict["dlLink"]

        while nextPage:
            gatewayPage = self.wg.getpage(
                nextPage, addlHeaders={'Referer': linkDict["sourceUrl"]})

            soup = bs4.BeautifulSoup(gatewayPage, "lxml")
            titleCont = soup.find("div", class_="image-menu")

            title = titleCont.h1.get_text()
            title = title.replace("Reading ", "")
            title, dummy = title.rsplit(" Page ", 1)
            title = title.strip()

            imageUrl = soup.find("img", class_="b")
            imageUrl = urllib.parse.urljoin(self.urlBase, imageUrl["src"])

            imagePath = urllib.parse.urlsplit(imageUrl)[2]
            imageFileName = imagePath.split("/")[-1]

            imageData = self.wg.getpage(imageUrl,
                                        addlHeaders={'Referer': nextPage})

            images.append((imageFileName, imageData))
            # Find next page
            nextPageLink = soup.find("a", class_="link-next")
            if not nextPageLink:
                nextPage = None
            elif nextPageLink["href"].startswith(
                    "/finish/"):  # Break on the last image.
                nextPage = None
            else:
                nextPage = urllib.parse.urljoin(self.urlBase,
                                                nextPageLink["href"])

        # self.log.info(len(content))

        if images and title:
            fileN = title + ".zip"
            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            self.log.info("Complete filepath: %s", wholePath)

            #Write all downloaded files to the archive.
            try:
                arch = zipfile.ZipFile(wholePath, "w")
            except OSError:
                title = title.encode('ascii', 'ignore').decode('ascii')
                fileN = title + ".zip"
                fileN = nt.makeFilenameSafe(fileN)
                wholePath = os.path.join(linkDict["dirPath"], fileN)
                arch = zipfile.ZipFile(wholePath, "w")

            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            self.log.info("Successfully Saved to path: %s", wholePath)

            self.updateDbEntry(linkDict["sourceUrl"],
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(None,
                                                         wholePath,
                                                         pron=True,
                                                         deleteDups=True,
                                                         includePHash=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
            self.conn.commit()

            return wholePath

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")

            self.conn.commit()
            return False
	def getLink(self, link):
		sourceUrl  = link["sourceUrl"]
		seriesName = link["seriesName"]
		chapterVol = link["originName"]


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			imageUrls = self.getImageUrls(sourceUrl)
			if not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("Page not found - 404")
				self.updateDbEntry(sourceUrl, dlState=-1)
				return



			self.log.info("Downloading = '%s', '%s' ('%s images)", seriesName, chapterVol, len(imageUrls))
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterName = nt.makeFilenameSafe(chapterVol)

			fqFName = os.path.join(dlPath, chapterName+"["+self.groupName+"].zip")

			loop = 1
			while os.path.exists(fqFName):
				fqFName, ext = os.path.splitext(fqFName)
				fqFName = "%s (%d)%s" % (fqFName, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			for imageName, imgUrl, referrerUrl in imageUrls:
				dummy_imageName, imageContent = self.getImage(imgUrl, referrerUrl)
				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, seriesName=seriesName, originName=chapterVol, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, downloadPath=filePath, fileName=fileName)

			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True)
			self.log.info( "Done")

			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, seriesName=seriesName, originName=chapterVol, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
Example #59
0
	def getLink(self, link):


		seriesName = link["seriesName"]
		seriesName = seriesName.replace("[", "(").replace("]", "(")
		safeBaseName = nt.makeFilenameSafe(link["seriesName"])



		if seriesName in nt.dirNameProxy:
			self.log.info( "Have target dir for '%s' Dir = '%s'", seriesName, nt.dirNameProxy[seriesName]['fqPath'])
			link["targetDir"] = nt.dirNameProxy[seriesName]["fqPath"]
		else:
			self.log.info( "Don't have target dir for: %s Using default for: %s, full name = %s", seriesName, link["seriesName"], link["originName"])
			targetDir = os.path.join(settings.mkSettings["dirs"]['mDlDir'], safeBaseName)
			if not os.path.exists(targetDir):
				try:
					os.makedirs(targetDir)
					link["targetDir"] = targetDir
					self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "newdir"]))
					self.conn.commit()

					self.conn.commit()
				except OSError:
					self.log.critical("Directory creation failed?")
					self.log.critical(traceback.format_exc())
			else:
				self.log.warning("Directory not found in dir-dict, but it exists!")
				self.log.warning("Directory-Path: %s", targetDir)
				link["targetDir"] = targetDir

				self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()




		sourceUrl, originFileName = link["sourceUrl"], link["originName"]

		self.log.info( "Should retreive: %s, url - %s", originFileName, sourceUrl)

		self.updateDbEntry(sourceUrl, dlState=1)
		self.conn.commit()


		try:
			content, hName = self.getLinkFile(sourceUrl)
		except:
			self.log.error("Unrecoverable error retreiving content %s", link)
			self.log.error("Traceback: %s", traceback.format_exc())

			self.updateDbEntry(sourceUrl, dlState=-1)
			return

		# print("Content type = ", type(content))


		# And fix %xx crap
		hName = urllib.parse.unquote(hName)

		fName = "%s - %s" % (originFileName, hName)
		fName = nt.makeFilenameSafe(fName)

		fqFName = os.path.join(link["targetDir"], fName)
		self.log.info( "SaveName = %s", fqFName)


		loop = 1
		while os.path.exists(fqFName):
			fName = "%s - (%d) - %s" % (originFileName, loop,  hName)
			fqFName = os.path.join(link["targetDir"], fName)
			loop += 1
		self.log.info( "Writing file")

		filePath, fileName = os.path.split(fqFName)

		try:
			chop = len(fileName)-4

			wholePath = "ERROR"
			while 1:

				try:
					fileName = fileName[:chop]+fileName[-4:]
					# self.log.info("geturl with processing", fileName)
					wholePath = os.path.join(filePath, fileName)
					self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
					with open(wholePath, "wb") as fp:
						fp.write(content)
					self.log.info("Successfully Saved to path: %s", wholePath)
					break
				except IOError:
					chop = chop - 1
					if chop < 200:
						raise RuntimeError("Don't know what's going on, but a file truncated too far!")
					self.log.warn("Truncating file length to %s characters.", chop)




		except TypeError:
			self.log.error("Failure trying to retreive content from source %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=-4, downloadPath=filePath, fileName=fileName)
			return
		#self.log.info( filePath)

		ext = os.path.splitext(fileName)[-1]
		imageExts = ["jpg", "png", "bmp"]
		if not any([ext.endswith(ex) for ex in imageExts]):
			# We don't want to upload the file we just downloaded, so specify doUpload as false.
			dedupState = processDownload.processDownload(False, fqFName, deleteDups=True, doUpload=False)
		else:
			dedupState = ""

		self.log.info( "Done")
		self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
		return