def downloadManga(self): print("Parsing XML File...") dom = minidom.parse(self.xmlfile_path) threadPool = [] self.options.auto = True SetOutputPathToName_Flag = False # Default OutputDir is the ./MangaName if (self.options.outputDir == 'DEFAULT_VALUE'): SetOutputPathToName_Flag = True for node in dom.getElementsByTagName("MangaSeries"): name = getText(node.getElementsByTagName('name')[0]) site = getText(node.getElementsByTagName('HostSite')[0]) try: lastDownloaded = getText(node.getElementsByTagName('LastChapterDownloaded')[0]) except IndexError: lastDownloaded = "" try: download_path = getText(node.getElementsByTagName('downloadPath')[0]) except IndexError: download_path = ('./' + fixFormatting(name)) self.options.site = site self.options.manga = name self.options.downloadPath = download_path self.options.lastDownloaded = lastDownloaded if SetOutputPathToName_Flag: self.options.outputDir = download_path # Because the SiteParserThread constructor parses the site to retrieve which chapters to # download the following code would be faster # thread = SiteParserThread(self.options, dom, node) # thread.start() # threadPool.append(thread) # Need to remove the loop which starts the thread's downloading. The disadvantage is that the # the print statement would intermingle with the progress bar. It would be very difficult to # understand what was happening. Do not believe this change is worth it. threadPool.append(SiteParserThread(self.options, dom, node)) for thread in threadPool: thread.start() thread.join() #print (dom.toxml()) f = open(self.xmlfile_path, 'w') f.write(dom.toxml())
def downloadManga(self): print("Parsing XML File...") if (self.verbose_FLAG): print("XML Path = %s" % self.xmlfile_path) dom = minidom.parse(self.xmlfile_path) threadPool = [] self.options.auto = True SetOutputPathToName_Flag = False # Default OutputDir is the ./MangaName if (self.options.outputDir == 'DEFAULT_VALUE'): SetOutputPathToName_Flag = True for node in dom.getElementsByTagName("MangaSeries"): seriesOptions = copy.copy(self.options) seriesOptions.manga = getText(node.getElementsByTagName('name')[0]) seriesOptions.site = getText( node.getElementsByTagName('HostSite')[0]) try: lastDownloaded = getText( node.getElementsByTagName('LastChapterDownloaded')[0]) except IndexError: lastDownloaded = "" try: download_path = getText( node.getElementsByTagName('downloadPath')[0]) except IndexError: download_path = ('./' + fixFormatting( seriesOptions.manga, seriesOptions.spaceToken)) if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs( download_path): download_path = os.path.join(self.options.downloadPath, download_path) seriesOptions.downloadPath = download_path seriesOptions.lastDownloaded = lastDownloaded if SetOutputPathToName_Flag: seriesOptions.outputDir = download_path # Because the SiteParserThread constructor parses the site to retrieve which chapters to # download the following code would be faster # thread = SiteParserThread(self.options, dom, node) # thread.start() # threadPool.append(thread) # Need to remove the loop which starts the thread's downloading. The disadvantage is that the # the print statement would intermingle with the progress bar. It would be very difficult to # understand what was happening. Do not believe this change is worth it. threadPool.append(SiteParserThread(seriesOptions, dom, node)) for thread in threadPool: thread.start() thread.join() #Backs up file backupFileName = self.xmlfile_path + "_bak" os.rename(self.xmlfile_path, backupFileName) f = open(self.xmlfile_path, 'w') outputStr = '\n'.join( [line for line in dom.toprettyxml().split('\n') if line.strip()]) outputStr = outputStr.encode('utf-8') f.write(outputStr) # The file was succesfully saved and now remove backup os.remove(backupFileName)
def downloadManga(self): print("Parsing XML File...") if (self.verbose_FLAG): print("XML Path = %s" % self.xmlfile_path) dom = minidom.parse(self.xmlfile_path) threadPool = [] self.options.auto = True SetOutputPathToName_Flag = False # Default OutputDir is the ./MangaName if (self.options.outputDir == 'DEFAULT_VALUE'): SetOutputPathToName_Flag = True for node in dom.getElementsByTagName("MangaSeries"): seriesOptions = self.options seriesOptions.manga = getText(node.getElementsByTagName('name')[0]) seriesOptions.site = getText(node.getElementsByTagName('HostSite')[0]) try: lastDownloaded = getText(node.getElementsByTagName('LastChapterDownloaded')[0]) except IndexError: lastDownloaded = "" try: download_path = getText(node.getElementsByTagName('downloadPath')[0]) except IndexError: download_path = ('./' + fixFormatting(seriesOptions.manga, seriesOptions.spaceToken)) if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(download_path): download_path = os.path.join(self.options.downloadPath, download_path) seriesOptions.downloadPath = download_path seriesOptions.lastDownloaded = lastDownloaded if SetOutputPathToName_Flag: seriesOptions.outputDir = download_path # Because the SiteParserThread constructor parses the site to retrieve which chapters to # download the following code would be faster # thread = SiteParserThread(self.options, dom, node) # thread.start() # threadPool.append(thread) # Need to remove the loop which starts the thread's downloading. The disadvantage is that the # the print statement would intermingle with the progress bar. It would be very difficult to # understand what was happening. Do not believe this change is worth it. threadPool.append(SiteParserThread(seriesOptions, dom, node)) for thread in threadPool: thread.start() thread.join() #Backs up file backupFileName = self.xmlfile_path + "_bak" os.rename(self.xmlfile_path, backupFileName) f = open(self.xmlfile_path, 'w') outputStr = '\n'.join([line for line in dom.toprettyxml().split('\n') if line.strip()]) outputStr = outputStr.encode('utf-8') f.write(outputStr) # The file was succesfully saved and now remove backup os.remove(backupFileName)
def main(): printLicenseInfo() # for easier parsing, adds free --help and --version # optparse (v2.3-v2.7) was chosen over argparse (v2.7+) for compatibility (and relative similarity) reasons # and over getopt(v?) for additional functionality parser = optparse.OptionParser( usage='usage: %prog [options] <manga name>', version=('Manga Downloader %s' % VERSION) ) parser.set_defaults( all_chapters_FLAG = False, auto = False, conversion_FLAG = False, convert_Directory = False, device = 'Kindle 3', downloadFormat = '.cbz', downloadPath = 'DEFAULT_VALUE', inputDir = None, outputDir = 'DEFAULT_VALUE', overwrite_FLAG = False, verbose_FLAG = False, timeLogging_FLAG = False, maxChapterThreads = 3, useShortName = False, spaceToken = '.', proxy = None, siteSelect = 0 ) parser.add_option( '--all', action = 'store_true', dest = 'all_chapters_FLAG', help = 'Download all available chapters.' ) parser.add_option( '-d', '--directory', dest = 'downloadPath', help = 'The destination download directory. Defaults to the directory of the script.' ) parser.add_option( '--overwrite', action = 'store_true', dest = 'overwrite_FLAG', help = 'Overwrites previous copies of downloaded chapters.' ) parser.add_option( '--verbose', action = 'store_true', dest = 'verbose_FLAG', help = 'Verbose Output.' ) parser.add_option( '-x','--xml', dest = 'xmlfile_path', help = 'Parses the .xml file and downloads all chapters newer than the last chapter downloaded for the listed mangas.' ) parser.add_option( '-c', '--convertFiles', action = 'store_true', dest = 'conversion_FLAG', help = 'Converts downloaded files to a Format/Size acceptable to the device specified by the --device parameter.' ) parser.add_option( '--device', dest = 'device', help = 'Specifies the conversion device. Omitting this option default to %default.' ) parser.add_option( '--convertDirectory', action = 'store_true', dest = 'convert_Directory', help = 'Converts the image files stored in the directory specified by --inputDirectory. Stores the converted images in the directory specified by --outputDirectory' ) parser.add_option( '--inputDirectory', dest = 'inputDir', help = 'The directory containing the images to convert when --convertDirectory is specified.' ) parser.add_option( '--outputDirectory', dest = 'outputDir', help = 'The directory to store the images when --convertDirectory is specified.' ) parser.add_option( '-z', '--zip', action = 'store_const', dest = 'downloadFormat', const = '.zip', help = 'Downloads using .zip compression. Omitting this option defaults to %default.' ) parser.add_option( '-t', '--threads', dest = 'maxChapterThreads', help = 'Limits the number of chapter threads to the value specified.' ) parser.add_option( '--timeLogging', action = 'store_true', dest = 'timeLogging_FLAG', help = 'Output time logging.' ) parser.add_option( '--useShortName', action = 'store_true', dest = 'useShortName_FLAG', help = 'To support devices that limit the size of the filename, this parameter uses a short name' ) parser.add_option( '--spaceToken', dest = 'spaceToken', help = 'Specifies the character used to replace spaces in the manga name.' ) parser.add_option( '--proxy', dest = 'proxy', help = 'Specifies the proxy.' ) parser.add_option( '-s', '--site', dest = 'siteSelect', help = 'Specifies the site to download from.' ) (options, args) = parser.parse_args() try: options.siteSelect = int(options.siteSelect) except: options.siteSelect = 0 try: options.maxChapterThreads = int(options.maxChapterThreads) except: options.maxChapterThreads = 2 if (options.maxChapterThreads <= 0): options.maxChapterThreads = 2; if(len(args) == 0 and ( not (options.convert_Directory or options.xmlfile_path != None) )): parser.error('Manga not specified.') #if(len(args) > 1): # parser.error('Possible multiple mangas specified, please select one. (Did you forget to put quotes around a multi-word manga?)') SetDownloadPathToName_Flag = False SetOutputPathToDefault_Flag = False if(len(args) > 0): # Default Directory is the ./MangaName if (options.downloadPath == 'DEFAULT_VALUE'): SetDownloadPathToName_Flag = True # Default outputDir is the ./MangaName if (options.outputDir == 'DEFAULT_VALUE'): SetOutputPathToDefault_Flag = True PILAvailable = isImageLibAvailable() # Check if PIL Library is available if either of convert Flags are set if ((not PILAvailable) and (options.convert_Directory or options.conversion_FLAG)): print ("\nConversion Functionality Not available.\nMust install the PIL (Python Image Library)") sys.exit() else: if (PILAvailable): from ConvertPackage.ConvertFile import convertFile if (options.convert_Directory): options.inputDir = os.path.abspath(options.inputDir) # Changes the working directory to the script location if (os.path.dirname(sys.argv[0]) != ""): os.chdir(os.path.dirname(sys.argv[0])) options.outputMgr = progressBarManager() options.outputMgr.start() try: if (options.convert_Directory): if ( options.outputDir == 'DEFAULT_VALUE' ): options.outputDir = '.' print("Converting Files: %s" % options.inputDir) convertFile.convert(options.outputMgr, options.inputDir, options.outputDir, options.device, options.verbose_FLAG) elif options.xmlfile_path != None: xmlParser = MangaXmlParser(options) xmlParser.downloadManga() else: threadPool = [] for manga in args: print( manga ) options.manga = manga if SetDownloadPathToName_Flag: options.downloadPath = ('./' + fixFormatting(options.manga, options.spaceToken)) if SetOutputPathToDefault_Flag: options.outputDir = options.downloadPath options.downloadPath = os.path.realpath(options.downloadPath) + os.sep # site selection if(options.siteSelect == 0): print('Which site?') for i in siteDict: print(siteDict[i][1]) # Python3 fix - removal of raw_input() try: options.siteSelect = raw_input() except NameError: options.siteSelect = input() try: options.site = siteDict[int(options.siteSelect)][0] except KeyError: raise InvalidSite('Site selection invalid.') threadPool.append(SiteParserThread(options, None, None)) for thread in threadPool: thread.start() thread.join() finally: # Must always stop the manager options.outputMgr.stop()
def main(): printLicenseInfo() # for easier parsing, adds free --help and --version # optparse (v2.3-v2.7) was chosen over argparse (v2.7+) for compatibility (and relative similarity) reasons # and over getopt(v?) for additional functionality parser = optparse.OptionParser(usage='usage: %prog [options] <manga name>', version=('Manga Downloader %s' % VERSION)) parser.set_defaults(all_chapters_FLAG=False, auto=False, conversion_FLAG=False, convert_Directory=False, device='Kindle 3', downloadFormat='.cbz', downloadPath='DEFAULT_VALUE', inputDir=None, outputDir='DEFAULT_VALUE', overwrite_FLAG=False, verbose_FLAG=False, timeLogging_FLAG=False, maxChapterThreads=3, useShortName=False, spaceToken='.', proxy=None, siteSelect=0) parser.add_option('--all', action='store_true', dest='all_chapters_FLAG', help='Download all available chapters.') parser.add_option( '-d', '--directory', dest='downloadPath', help= 'The destination download directory. Defaults to the directory of the script.' ) parser.add_option( '--overwrite', action='store_true', dest='overwrite_FLAG', help='Overwrites previous copies of downloaded chapters.') parser.add_option('--verbose', action='store_true', dest='verbose_FLAG', help='Verbose Output.') parser.add_option( '-x', '--xml', dest='xmlfile_path', help= 'Parses the .xml file and downloads all chapters newer than the last chapter downloaded for the listed mangas.' ) parser.add_option( '-c', '--convertFiles', action='store_true', dest='conversion_FLAG', help= 'Converts downloaded files to a Format/Size acceptable to the device specified by the --device parameter.' ) parser.add_option( '--device', dest='device', help= 'Specifies the conversion device. Omitting this option default to %default.' ) parser.add_option( '--convertDirectory', action='store_true', dest='convert_Directory', help= 'Converts the image files stored in the directory specified by --inputDirectory. Stores the converted images in the directory specified by --outputDirectory' ) parser.add_option( '--inputDirectory', dest='inputDir', help= 'The directory containing the images to convert when --convertDirectory is specified.' ) parser.add_option( '--outputDirectory', dest='outputDir', help= 'The directory to store the images when --convertDirectory is specified.' ) parser.add_option( '-z', '--zip', action='store_const', dest='downloadFormat', const='.zip', help= 'Downloads using .zip compression. Omitting this option defaults to %default.' ) parser.add_option( '-t', '--threads', dest='maxChapterThreads', help='Limits the number of chapter threads to the value specified.') parser.add_option('--timeLogging', action='store_true', dest='timeLogging_FLAG', help='Output time logging.') parser.add_option( '--useShortName', action='store_true', dest='useShortName_FLAG', help= 'To support devices that limit the size of the filename, this parameter uses a short name' ) parser.add_option( '--spaceToken', dest='spaceToken', help='Specifies the character used to replace spaces in the manga name.' ) parser.add_option('--proxy', dest='proxy', help='Specifies the proxy.') parser.add_option('-s', '--site', dest='siteSelect', help='Specifies the site to download from.') (options, args) = parser.parse_args() try: options.siteSelect = int(options.siteSelect) except: options.siteSelect = 0 try: options.maxChapterThreads = int(options.maxChapterThreads) except: options.maxChapterThreads = 2 if (options.maxChapterThreads <= 0): options.maxChapterThreads = 2 if (len(args) == 0 and (not (options.convert_Directory or options.xmlfile_path != None))): parser.error('Manga not specified.') #if(len(args) > 1): # parser.error('Possible multiple mangas specified, please select one. (Did you forget to put quotes around a multi-word manga?)') SetDownloadPathToName_Flag = False SetOutputPathToDefault_Flag = False if (len(args) > 0): # Default Directory is the ./MangaName if (options.downloadPath == 'DEFAULT_VALUE'): SetDownloadPathToName_Flag = True # Default outputDir is the ./MangaName if (options.outputDir == 'DEFAULT_VALUE'): SetOutputPathToDefault_Flag = True PILAvailable = isImageLibAvailable() # Check if PIL Library is available if either of convert Flags are set if ((not PILAvailable) and (options.convert_Directory or options.conversion_FLAG)): print( "\nConversion Functionality Not available.\nMust install the PIL (Python Image Library)" ) sys.exit() else: if (PILAvailable): from ConvertPackage.ConvertFile import convertFile if (options.convert_Directory): options.inputDir = os.path.abspath(options.inputDir) # Changes the working directory to the script location if (os.path.dirname(sys.argv[0]) != ""): os.chdir(os.path.dirname(sys.argv[0])) options.outputMgr = progressBarManager() options.outputMgr.start() try: if (options.convert_Directory): if (options.outputDir == 'DEFAULT_VALUE'): options.outputDir = '.' print("Converting Files: %s" % options.inputDir) convertFile.convert(options.outputMgr, options.inputDir, options.outputDir, options.device, options.verbose_FLAG) elif options.xmlfile_path != None: xmlParser = MangaXmlParser(options) xmlParser.downloadManga() else: threadPool = [] for manga in args: print(manga) options.manga = manga if SetDownloadPathToName_Flag: options.downloadPath = ( './' + fixFormatting(options.manga, options.spaceToken)) if SetOutputPathToDefault_Flag: options.outputDir = options.downloadPath options.downloadPath = os.path.realpath( options.downloadPath) + os.sep # site selection if (options.siteSelect == 0): print('Which site?') for i in siteDict: print(siteDict[i][1]) # Python3 fix - removal of raw_input() try: options.siteSelect = raw_input() except NameError: options.siteSelect = input() try: options.site = siteDict[int(options.siteSelect)][0] except KeyError: raise InvalidSite('Site selection invalid.') threadPool.append(SiteParserThread(options, None, None)) for thread in threadPool: thread.start() thread.join() finally: # Must always stop the manager options.outputMgr.stop()
def parseSite(self): print('Beginning OtakuWorks check: %s' % self.manga) url = 'http://www.otakuworks.com/search/%s' % '+'.join(self.manga.split()) source = getSourceCode(url) info = OtakuWorks.re_getMangas.findall(source) # we either have 0 search results or we have already been redirected to the manga homepage if len(info) != 0: keyword = self.selectFromResults(info) source = getSourceCode(keyword) if(source.find('has been licensed and as per request all releases under it have been removed.') != -1): raise self.MangaNotFound('It has been removed.') # can't pre-compile this because relies on class name self.chapters = re.compile('a href="([^>]*%s[^>]*)">([^<]*#([^<]*))</a>' % '-'.join(fixFormatting(self.manga, '.').replace('_', ' ').split())).findall(source) self.chapters.reverse() lowerRange = 0 for i in range(0, len(self.chapters)): self.chapters[i] = ('http://www.otakuworks.com' + self.chapters[i][0] + '/read', self.chapters[i][1], self.chapters[i][2]) if (not self.auto): print('(%i) %s' % (i + 1, self.chapters[i][1])) else: if (self.lastDownloaded == self.chapters[i][1]): lowerRange = i + 1 # this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not upperRange = len(self.chapters) if (not self.auto): self.chapters_to_download = self.selectChapters(self.chapters) else: if ( lowerRange == upperRange): raise self.NoUpdates for i in range (lowerRange, upperRange): self.chapters_to_download.append(i) return