Example #1
0
def getLyrics(track, artist):
    """
    Returns a dictionary with possible keys:
    lyrics
    If no match found, returns empty dictionary
    """
    track = track.encode('utf-8')
    artist = artist.encode('utf-8')
    url = 'http://www.pandora.com/music/song/%s/%s' % (urllib.quote_plus(
        artist.lower()), urllib.quote_plus(track.lower()))
    ret = getSourceCode(url)

    try:
        trackUid = regex_trackUid.search(ret).group(1)
        intermMatch = regex_lyricIdCheckSum.search(ret)

        lyricId = intermMatch.group(1)
        checkSum = intermMatch.group(2)
        nonExplicit = 'false'
        authToken = 'null'
    except AttributeError:
        return {}
    else:
        return __getEncryptedLyrics(trackUid, lyricId, checkSum, nonExplicit,
                                    authToken)
Example #2
0
	def downloadChapter(self, downloadThread, max_pages, url, manga_chapter_prefix, current_chapter):
		pageIndex = 0
		for page in MangaPanda.re_getPage.findall(getSourceCode(url, self.proxy)):
			if (self.verbose_FLAG):
				print(self.chapters[current_chapter][1] + ' | ' + 'Page %s / %i' % (page[1], max_pages))

			pageUrl = 'http://www.mangapanda.com' + page[0]
			self.downloadImage(downloadThread, page[1], pageUrl, manga_chapter_prefix)
			pageIndex = pageIndex + 1
Example #3
0
	def downloadChapter(self, downloadThread, max_pages, url, manga_chapter_prefix, current_chapter):
		pageIndex = 0
		for page in MangaReader.re_getPage.findall(getSourceCode(url, self.proxy)):
			if (self.verbose_FLAG):
				print(self.chapters[current_chapter][1] + ' | ' + 'Page %s / %i' % (page[1], max_pages))

			pageUrl = 'http://www.mangareader.net' + page[0]
			self.downloadImage(downloadThread, page[1], pageUrl, manga_chapter_prefix)
			pageIndex = pageIndex + 1
Example #4
0
    def parseSite(self):
        print('Beginning MangaPanda check: %s' % self.manga)

        url = 'http://www.mangapanda.com/alphabetical'

        source = getSourceCode(url, self.proxy)
        allSeries = MangaPanda.re_getSeries.findall(
            source[source.find('series_col'):])

        keyword = self.selectFromResults(allSeries)

        url = 'http://www.mangapanda.com%s' % keyword
        source = getSourceCode(url, self.proxy)

        self.chapters = MangaPanda.re_getChapters.findall(source)

        lowerRange = 0

        for i in range(0, len(self.chapters)):
            self.chapters[i] = ('http://www.mangapanda.com%s' %
                                self.chapters[i][0], '%s%s' %
                                (self.chapters[i][1], self.chapters[i][2]),
                                self.chapters[i][1])
            if (not self.auto):
                print('(%i) %s' % (i + 1, self.chapters[i][1]))
            else:
                if (self.lastDownloaded == self.chapters[i][1]):
                    lowerRange = i + 1

        # this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
        upperRange = len(self.chapters)
        self.isPrependMangaName = False
        if (not self.auto):
            self.chapters_to_download = self.selectChapters(self.chapters)
        else:
            if (lowerRange == upperRange):
                raise self.NoUpdates

            for i in range(lowerRange, upperRange):
                self.chapters_to_download.append(i)

        self.isPrependMangaName = True

        return
Example #5
0
	def downloadAnimea(self, manga, chapter_start, chapter_end, download_path, download_format):
		for current_chapter in range(chapter_start, chapter_end + 1):	
			manga_chapter_prefix = manga.lower().replace('-', '_') + '_' + str(current_chapter).zfill(3)
			if (os.path.exists(download_path + manga_chapter_prefix + '.cbz') or os.path.exists(download_path + manga_chapter_prefix + '.zip')) and overwrite_FLAG == False:
				print('Chapter ' + str(current_chapter) + ' already downloaded, skipping to next chapter...')
				continue;
			url = 'http://manga.animea.net/'+ manga + '-chapter-' + str(current_chapter) + '-page-1.html'
			source = getSourceCode(url)
			max_pages = int(re.compile('of (.*?)</title>').search(source).group(1))
		
			for page in range(1, max_pages + 1):
				url = 'http://manga.animea.net/'+ manga + '-chapter-' + str(current_chapter) + '-page-' + str(page) + '.html'
				source = getSourceCode(url)
				img_url = re.compile('img src="(http.*?.[jp][pn]g)"').search(source).group(1)
				print('Chapter ' + str(current_chapter) + ' / ' + 'Page ' + str(page))
				print(img_url)
				downloadImage(img_url, os.path.join('mangadl_tmp', manga_chapter_prefix + '_' + str(page).zfill(3)))

			compress(manga_chapter_prefix, download_path, max_pages, download_format)
Example #6
0
 def downloadChapter(self, downloadThread, max_pages, url, manga_chapter_prefix, current_chapter):
     """We ignore max_pages, because you can't regex-search that under Batoto."""
     s = getSourceCode(url, self.proxy)
     soup = BeautifulSoup(s)
     ol = soup.find("select", id="page_select")("option")
     n = 1
     for i in ol:
         if self.verbose_FLAG:
             print(i['value'])
         self.downloadImage(downloadThread, n, i['value'], manga_chapter_prefix)
         n += 1
Example #7
0
 def downloadChapter(self, downloadThread, max_pages, url, manga_chapter_prefix, current_chapter):
     """We ignore max_pages, because you can't regex-search that under Batoto."""
     s = getSourceCode(url, self.proxy)
     soup = BeautifulSoup(s)
     ol = soup.find("select", id="page_select")("option")
     n = 1
     for i in ol:
         if self.verbose_FLAG:
             print(i['value'])
         self.downloadImage(downloadThread, n, i['value'], manga_chapter_prefix)
         n += 1
	def parseSite(self):
		print('Beginning OtakuWorks check: %s' % self.manga)
		url = 'http://www.otakuworks.com/search/%s' % '+'.join(self.manga.split())

		source = getSourceCode(url)
		
		info = OtakuWorks.re_getMangas.findall(source)
		
		# we either have 0 search results or we have already been redirected to the manga homepage
		if len(info) != 0:
			keyword = self.selectFromResults(info)
			source = getSourceCode(keyword)
	
		if(source.find('has been licensed and as per request all releases under it have been removed.') != -1):
			raise self.MangaNotFound('It has been removed.')
		
		# can't pre-compile this because relies on class name
		self.chapters = re.compile('a href="([^>]*%s[^>]*)">([^<]*#([^<]*))</a>' % '-'.join(fixFormatting(self.manga, '.').replace('_', ' ').split())).findall(source)
		self.chapters.reverse()

		lowerRange = 0
		
		for i in range(0, len(self.chapters)):
			self.chapters[i] = ('http://www.otakuworks.com' + self.chapters[i][0] + '/read', self.chapters[i][1], self.chapters[i][2])
			if (not self.auto):
				print('(%i) %s' % (i + 1, self.chapters[i][1]))
			else:
				if (self.lastDownloaded == self.chapters[i][1]):
					lowerRange = i + 1
		
		# this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
		upperRange = len(self.chapters)	
	
		if (not self.auto):
			self.chapters_to_download = self.selectChapters(self.chapters)
		else:
			if ( lowerRange == upperRange):
				raise self.NoUpdates
			for i in range (lowerRange, upperRange):
				self.chapters_to_download.append(i)
		return 
Example #9
0
	def parseSite(self):
		print('Beginning MangaPanda check: %s' % self.manga)
		
		url = 'http://www.mangapanda.com/alphabetical'

		source = getSourceCode(url, self.proxy)
		allSeries = MangaPanda.re_getSeries.findall(source[source.find('series_col'):])

		keyword = self.selectFromResults(allSeries)

		url = 'http://www.mangapanda.com%s' % keyword
		source = getSourceCode(url, self.proxy)

		self.chapters = MangaPanda.re_getChapters.findall(source)
		
		lowerRange = 0
	
		for i in range(0, len(self.chapters)):
			self.chapters[i] = ('http://www.mangapanda.com%s' % self.chapters[i][0], '%s%s' % (self.chapters[i][1], self.chapters[i][2]), self.chapters[i][1])
			if (not self.auto):
				print('(%i) %s' % (i + 1, self.chapters[i][1]))
			else:
				if (self.lastDownloaded == self.chapters[i][1]):
					lowerRange = i + 1
		
		# this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
		upperRange = len(self.chapters)
		self.isPrependMangaName = False				
		if (not self.auto):
			self.chapters_to_download = self.selectChapters(self.chapters)
		else:
			if (lowerRange == upperRange):
				raise self.NoUpdates
			
			for i in range (lowerRange, upperRange):
				self.chapters_to_download .append(i)
		
		self.isPrependMangaName = True
		
		return 
Example #10
0
    def downloadChapter(self, downloadThread, max_pages, url,
                        manga_chapter_prefix, current_chapter):
        pageIndex = 0
        pages = EatManga.re_getPage.findall(getSourceCode(url, self.proxy))

        #Remove duplicate pages if any and ensure order
        pages = list(OrderedDict.fromkeys(pages))

        for page in pages:
            if (self.verbose_FLAG):
                print(self.chapters[current_chapter][1] + ' | ' +
                      'Page %s / %i' % (page[1], max_pages))

            pageUrl = 'http://eatmanga.com%s' % page[0]
            self.downloadImage(downloadThread, page[1], pageUrl,
                               manga_chapter_prefix)
            pageIndex = pageIndex + 1
Example #11
0
    def parseSite(self):
        print('Beginning EatManga check: %s' % self.manga)
        url = 'http://eatmanga.com/Manga-Scan/%s' % self.fixFormatting(
            self.manga)
        if self.verbose_FLAG:
            print(url)

        source = getSourceCode(url, self.proxy)

        self.chapters = EatManga.re_getChapters.findall(source)
        self.chapters.reverse()

        if not self.chapters:
            raise self.MangaNotFound

        lowerRange = 0

        for i in range(0, len(self.chapters)):
            if 'upcoming' in self.chapters[i][0]:
                #Skip not available chapters
                del self.chapters[i]
                continue

            self.chapters[i] = ('http://eatmanga.com%s' % self.chapters[i][0],
                                self.chapters[i][2], self.chapters[i][2])
            if (not self.auto):
                print('(%i) %s' % (i + 1, self.chapters[i][1]))
            else:
                if (self.lastDownloaded == self.chapters[i][1]):
                    lowerRange = i + 1

        # this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
        upperRange = len(self.chapters)

        if (not self.auto):
            self.chapters_to_download = self.selectChapters(self.chapters)
        else:
            if (lowerRange == upperRange):
                raise self.NoUpdates

            for i in range(lowerRange, upperRange):
                self.chapters_to_download.append(i)

        self.isPrependMangaName = True

        return
Example #12
0
def __getEncryptedLyrics(trackUid, lyricId, checkSum, nonExplicit, authToken):
    url = "http://www.pandora.com/services/ajax/?"
    url += urllib.urlencode({   'method'       :'lyrics.getLyrics',
                                'trackUid'     :trackUid,
                                'checkSum'     :checkSum,
                                'nonExplicit'  :nonExplicit,
                                'authToken'    :authToken           }) 
    ret = getSourceCode(url)

    decryptionKey = re.search('var k="([^"]*)"', ret).group(1)

    # functions in javascript can contain ", which makes python dictionary 
    # parsing throw errors, workaround by replacing them out
    ret = re.sub('(function[^,]*)', '0', ret).replace('\\u00',r'\x')

    # use ast.literal_eval vs. eval because it's safer
    encrypted = ast.literal_eval(ret)

    encryptedLyrics= encrypted['lyrics']
    return __decryptLyrics(encryptedLyrics, decryptionKey)
Example #13
0
def __getEncryptedLyrics(trackUid, lyricId, checkSum, nonExplicit, authToken):
    url = "http://www.pandora.com/services/ajax/?"
    url += urllib.urlencode({
        'method': 'lyrics.getLyrics',
        'trackUid': trackUid,
        'checkSum': checkSum,
        'nonExplicit': nonExplicit,
        'authToken': authToken
    })
    ret = getSourceCode(url)

    decryptionKey = re.search('var k="([^"]*)"', ret).group(1)

    # functions in javascript can contain ", which makes python dictionary
    # parsing throw errors, workaround by replacing them out
    ret = re.sub('(function[^,]*)', '0', ret).replace('\\u00', r'\x')

    # use ast.literal_eval vs. eval because it's safer
    encrypted = ast.literal_eval(ret)

    encryptedLyrics = encrypted['lyrics']
    return __decryptLyrics(encryptedLyrics, decryptionKey)
Example #14
0
def getLyrics(track, artist):
    """
    Returns a dictionary with possible keys:
    lyrics
    If no match found, returns empty dictionary
    """
    track = track.encode('utf-8')
    artist = artist.encode('utf-8')
    url = 'http://www.pandora.com/music/song/%s/%s' % (urllib.quote_plus(artist.lower()), 
                                                       urllib.quote_plus(track.lower()))
    ret = getSourceCode(url)

    try:
        trackUid = regex_trackUid.search(ret).group(1)
        intermMatch = regex_lyricIdCheckSum.search(ret)

        lyricId = intermMatch.group(1)
        checkSum = intermMatch.group(2)
        nonExplicit = 'false'
        authToken = 'null'
    except AttributeError:
        return {}
    else:
        return __getEncryptedLyrics(trackUid, lyricId, checkSum, nonExplicit, authToken)
Example #15
0
 def get_next_url(self, c):
     s = getSourceCode(c, self.proxy)
     soup = BeautifulSoup(s)
     l = soup.find("img", title="Next Chapter").parent
     return l['href']
Example #16
0
    def parseSite(self):
        """
		Parses list of chapters and URLs associated with each one for the given manga and site.
		"""

        print('Beginning MangaHere check: %s' % self.manga)

        # jump straight to expected URL and test if manga removed
        url = 'http://www.mangahere.com/manga/%s/' % self.fixFormatting(
            self.manga)
        if self.verbose_FLAG:
            print(url)
        source = getSourceCode(url, self.proxy)

        if (source is None or 'the page you have requested can' in source):
            # do a 'begins-with' search, then a 'contains' search
            url = 'http://www.mangahere.com/search.php?name=%s' % '+'.join(
                self.manga.split())
            if self.verbose_FLAG:
                print(url)

            try:
                source = getSourceCode(url, self.proxy)
                seriesResults = MangaHere.re_getSeries.findall(source)

                seriesResults = []
                if source is not None:
                    seriesResults = MangaHere.re_getSeries.findall(source)

                if (0 == len(seriesResults)):
                    url = 'http://www.mangahere.com/search.php?name=%s' % '+'.join(
                        self.manga.split())
                    if self.verbose_FLAG:
                        print(url)
                    source = getSourceCode(url, self.proxy)
                    if source is not None:
                        seriesResults = MangaHere.re_getSeries.findall(source)

            # 0 results
            except AttributeError:
                raise self.MangaNotFound(
                    'It doesn\'t exist, or cannot be resolved by autocorrect.')
            else:
                keyword = self.selectFromResults(seriesResults)
                if self.verbose_FLAG:
                    print("Keyword: %s" % keyword)
                url = 'http://www.mangahere.com/manga/%s/' % keyword
                if self.verbose_FLAG:
                    print(url)
                source = getSourceCode(url, self.proxy)

        else:
            # The Guess worked
            keyword = self.fixFormatting(self.manga)
            if self.verbose_FLAG:
                print("Keyword: %s" % keyword)

        # other check for manga removal if our initial guess for the name was wrong
        if ('it is not available in.' in source):
            raise self.MangaNotFound('It has been removed.')

        # that's nice of them
        #url = 'http://www.mangahere.com/cache/manga/%s/chapters.js' % keyword
        #source = getSourceCode(url, self.proxy)

        # chapters is a 2-tuple
        # chapters[0] contains the chapter URL
        # chapters[1] contains the chapter title

        isChapterOnly = False

        # can't pre-compile this because relies on class name
        re_getChapters = re.compile(
            'a.*?href="http://.*?mangahere.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?"'
            % keyword)
        self.chapters = re_getChapters.findall(source)
        if not self.chapters:
            if self.verbose_FLAG:
                print("Trying chapter only regex")
            isChapterOnly = True
            re_getChapters = re.compile(
                'a.*?href="http://.*?mangahere.*?/manga/%s/(c[\d]+)/[^"]*?"' %
                keyword)
            self.chapters = re_getChapters.findall(source)

        self.chapters.reverse()

        # code used to both fix URL from relative to absolute as well as verify last downloaded chapter for XML component
        lowerRange = 0

        if isChapterOnly:
            for i in range(0, len(self.chapters)):
                if self.verbose_FLAG:
                    print("%s" % self.chapters[i])
                if (self.auto):
                    if (self.lastDownloaded == self.chapters[i]):
                        lowerRange = i + 1

                self.chapters[i] = ('http://www.mangahere.com/manga/%s/%s' %
                                    (keyword, self.chapters[i]),
                                    self.chapters[i], self.chapters[i])

        else:
            for i in range(0, len(self.chapters)):
                if self.verbose_FLAG:
                    print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
                self.chapters[i] = (
                    'http://www.mangahere.com/manga/%s/%s/%s' %
                    (keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1],
                    self.chapters[i][1])
                if (self.auto):
                    if (self.lastDownloaded == self.chapters[i][1]):
                        lowerRange = i + 1

        # this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
        upperRange = len(self.chapters)

        # Validate whether the last chapter is a
        if (self.verbose_FLAG):
            print(self.chapters[upperRange - 1])
            print("Validating chapter: %s" % self.chapters[upperRange - 1][0])
        source = getSourceCode(self.chapters[upperRange - 1][0], self.proxy)

        if ('not available yet' in source):
            # If the last chapter is not available remove it from the list
            del self.chapters[upperRange - 1]
            upperRange = upperRange - 1

        # which ones do we want?
        if (not self.auto):
            for i in range(0, upperRange):
                print('(%i) %s' % (i + 1, self.chapters[i][1]))

            self.chapters_to_download = self.selectChapters(self.chapters)
        # XML component
        else:
            if (lowerRange == upperRange):
                raise self.NoUpdates

            for i in range(lowerRange, upperRange):
                self.chapters_to_download.append(i)
        return
Example #17
0
    def parseSite(self):
        """
		Parses list of chapters and URLs associated with each one for the given manga and site.
		"""

        print('Beginning MangaFox check: %s' % self.manga)

        # jump straight to expected URL and test if manga removed
        url = 'http://mangafox.me/manga/%s/' % self.fixFormatting(self.manga)
        if self.verbose_FLAG:
            print(url)

        source, redirectURL = getSourceCode(url, self.proxy, True)

        if (redirectURL != url or source is None
                or 'the page you have requested cannot be found' in source):
            # Could not find the manga page by guessing
            # Use the website search
            url = 'http://mangafox.me/search.php?name_method=bw&name=%s&is_completed=&advopts=1' % '+'.join(
                self.manga.split())
            if self.verbose_FLAG:
                print(url)
            try:
                source = getSourceCode(url, self.proxy)
                seriesResults = []
                if source is not None:
                    seriesResults = MangaFox.re_getSeries.findall(source)

                if (0 == len(seriesResults)):
                    url = 'http://mangafox.me/search.php?name_method=cw&name=%s&is_completed=&advopts=1' % '+'.join(
                        self.manga.split())
                    if self.verbose_FLAG:
                        print(url)
                    source = getSourceCode(url, self.proxy)
                    if source is not None:
                        seriesResults = MangaFox.re_getSeries.findall(source)

            # 0 results
            except AttributeError:
                raise self.MangaNotFound(
                    'It doesn\'t exist, or cannot be resolved by autocorrect.')
            else:
                keyword = self.selectFromResults(seriesResults)
                if self.verbose_FLAG:
                    print("Keyword: %s" % keyword)
                url = 'http://mangafox.me/manga/%s/' % keyword
                if self.verbose_FLAG:
                    print("URL: %s" % url)
                source = getSourceCode(url, self.proxy)

                if (source is None):
                    raise self.MangaNotFound('Search Failed to find Manga.')
        else:
            # The Guess worked
            keyword = self.fixFormatting(self.manga)
            if self.verbose_FLAG:
                print("Keyword: %s" % keyword)

        if ('it is not available in Manga Fox.' in source):
            raise self.MangaNotFound('It has been removed.')

        # that's nice of them
        #url = 'http://mangafox.me/cache/manga/%s/chapters.js' % keyword
        #source = getSourceCode(url, self.proxy)

        # chapters is a 2-tuple
        # chapters[0] contains the chapter URL
        # chapters[1] contains the chapter title

        isChapterOnly = False

        # can't pre-compile this because relies on class name
        re_getChapters = re.compile(
            'a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title'
            % keyword)
        self.chapters = re_getChapters.findall(source)
        if not self.chapters:
            if self.verbose_FLAG:
                print("Trying chapter only regex")
            isChapterOnly = True
            re_getChapters = re.compile(
                'a href="http://.*?mangafox.*?/manga/%s/(c[\d]+)/[^"]*?" title'
                % keyword)
            self.chapters = re_getChapters.findall(source)

        self.chapters.reverse()

        # code used to both fix URL from relative to absolute as well as verify last downloaded chapter for XML component
        lowerRange = 0

        if isChapterOnly:
            for i in range(0, len(self.chapters)):
                if self.verbose_FLAG:
                    print("%s" % self.chapters[i])
                if (not self.auto):
                    print('(%i) %s' % (i + 1, self.chapters[i]))
                else:
                    if (self.lastDownloaded == self.chapters[i]):
                        lowerRange = i + 1

                self.chapters[i] = ('http://mangafox.me/manga/%s/%s' %
                                    (keyword, self.chapters[i]),
                                    self.chapters[i], self.chapters[i])

        else:
            for i in range(0, len(self.chapters)):
                if self.verbose_FLAG:
                    print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
                self.chapters[i] = (
                    'http://mangafox.me/manga/%s/%s/%s' %
                    (keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1],
                    self.chapters[i][1])
                if (not self.auto):
                    print('(%i) %s' % (i + 1, self.chapters[i][1]))
                else:
                    if (self.lastDownloaded == self.chapters[i][1]):
                        lowerRange = i + 1

        # this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
        upperRange = len(self.chapters)

        # which ones do we want?
        if (not self.auto):
            self.chapters_to_download = self.selectChapters(self.chapters)
        # XML component
        else:
            if (lowerRange == upperRange):
                raise self.NoUpdates

            for i in range(lowerRange, upperRange):
                self.chapters_to_download.append(i)
        return
Example #18
0
    def parseSite(self):
        print("Beginning Batoto check: {}".format(self.manga))

        url = "http://www.batoto.net/search?name={}&name_cond=c".format('+'.join(self.manga.split()))
        s = getSourceCode(url, self.proxy)
        soup = BeautifulSoup(s)
        a = soup.find("div", id="comic_search_results")
        r = a.tbody.find_all("tr")[1:]
        seriesl = []
        try:
            for i in r:
                u = i.td.a['href']
                t = i.td.a.img.next_sibling[1:]
                seriesl.append((u,t.encode('utf-8')))
        except TypeError:
            # signifies no manga found
            raise self.MangaNotFound("Nonexistent.")
            
        manga = self.selectFromResults(seriesl)
        if self.verbose_FLAG:
            print(manga)
        mname = [i for i in seriesl if i[0] == manga][0][1]
        s = getSourceCode(manga, self.proxy)
        soup = BeautifulSoup(s)
        t = soup.find("table", class_="chapters_list").tbody
        cl = t.find_all("tr", class_="lang_English")
        self.chapters = [[]]
        cnum = self.chapters[0]
        for i in cl:
            u = i.td.a['href']
            t = i.td.a.img.next_sibling[1:]
            g = i.find_all("td")[2].get_text().strip()
            try:
                c = float(re.search("ch([\d.]+)", u).group(1))
            except AttributeError:
                c = 0
            tu = (u,t,g,c)
            if len(cnum) == 0 or cnum[0][3] == c:
                cnum.append(tu)
            else:
                self.chapters.append([])
                cnum = self.chapters[-1]
                cnum.append(tu)
        self.chapters.reverse()
        sc = None
        for i in self.chapters:
            if len(i) == 1 or sc == None:
                if sc != None and sc[2] != i[0][2]:
                    if self.verbose_FLAG:
                        print("switched to {} at {}".format(i[0][2], i[0][3]))
                sc = i[0]
                del i[1:]
                continue
            ll = [n for n in i if n[2] == sc[2]]
            if len(ll) != 1:
                c = self.get_next_url(sc[0])
                i[0] = [n for n in i if n[0] == c][0]
                if self.verbose_FLAG:
                    print("Anomaly at chapter {} ({} matches, chose {})".format(i[0][3], len(ll), i[0][2]))
                del i[1:]
                sc = i[0]
                continue
            i[0] = ll[0]
            sc = i[0]
            del i[1:]
        self.chapters = [i[0] for i in self.chapters]
        for n,c in enumerate(self.chapters):
            print("{:03d}. {}".format(n+1, c[1].encode('utf-8')))
        self.chapters_to_download = self.selectChapters(self.chapters)
Example #19
0
    def parseSite(self):
        print("Beginning Batoto check: {0}".format(self.manga))

        url = "http://www.batoto.net/search?name={0}&name_cond=c".format(
            '+'.join(self.manga.split()))
        s = getSourceCode(url, self.proxy)
        soup = BeautifulSoup(s)
        a = soup.find("div", id="comic_search_results")
        r = a.tbody.find_all("tr")[1:]
        seriesl = []
        for i in r:
            try:
                e = i.td.findAll('a')[1]
                u = e['href']
                t = e.img.next_sibling[1:]
                seriesl.append((u, t.encode('utf-8')))
            except:
                pass

        if not seriesl:
            # signifies no manga found
            raise self.MangaNotFound("Nonexistent.")

        manga = self.selectFromResults(seriesl)
        if self.verbose_FLAG:
            print(manga)
        mname = [i for i in seriesl if i[0] == manga][0][1]
        s = getSourceCode(manga, self.proxy)
        soup = BeautifulSoup(s)
        t = soup.find("table", class_="chapters_list").tbody
        cl = t.find_all("tr", class_="lang_English")
        self.chapters = [[]]
        cnum = self.chapters[0]

        for i in cl:
            u = i.td.a['href']
            t = i.td.a.img.next_sibling[1:]
            g = i.find_all("td")[2].get_text().strip()

            try:
                c = float(re.search("ch([\d.]+)", u).group(1))
                c = str(int(c)) if c.is_integer() else str(c)
            except AttributeError:
                c = 0
            tu = (u, t, c, g)
            if len(cnum) == 0 or cnum[0][3] == c:
                cnum.append(tu)
            else:
                self.chapters.append([])
                cnum = self.chapters[-1]
                cnum.append(tu)

        self.chapters.reverse()

        #Look for first chapter that should be downloaded in auto mode
        lowerRange = 0
        if (self.auto):
            for i in range(0, len(self.chapters)):
                if (self.lastDownloaded == self.chapters[i][0][1]):
                    lowerRange = i + 1

        sc = None
        for i in self.chapters:
            if len(i) == 1 or sc == None:
                if sc != None and sc[2] != i[0][2]:
                    if self.verbose_FLAG:
                        print("switched to {0} at {1}".format(
                            i[0][2], i[0][3]))
                sc = i[0]
                del i[1:]
                continue
            ll = [n for n in i if n[2] == sc[2]]
            if len(ll) != 1:
                c = self.get_next_url(sc[0])
                i[0] = [n for n in i if n[0] == c][0]
                if self.verbose_FLAG:
                    print("Anomaly at chapter {0} ({1} matches, chose {2})".
                          format(i[0][3], len(ll), i[0][2]))
                del i[1:]
                sc = i[0]
                continue
            i[0] = ll[0]
            sc = i[0]
            del i[1:]
        self.chapters = [i[0] for i in self.chapters]

        upperRange = len(self.chapters)
        # which ones do we want?
        if (not self.auto):
            for n, c in enumerate(self.chapters):
                print("{0:03d}. {1}".format(n + 1, c[1].encode('utf-8')))
            self.chapters_to_download = self.selectChapters(self.chapters)
        # XML component
        else:
            if (lowerRange == upperRange):
                raise self.NoUpdates

            for i in range(lowerRange, upperRange):
                self.chapters_to_download.append(i)
        return
Example #20
0
    def parseSite(self):
        print("Beginning Batoto check: {}".format(self.manga))

        url = "http://www.batoto.net/search?name={}&name_cond=c".format('+'.join(self.manga.split()))
        s = getSourceCode(url, self.proxy)
        soup = BeautifulSoup(s)
        a = soup.find("div", id="comic_search_results")
        r = a.tbody.find_all("tr")[1:]
        seriesl = []
        for i in r:
            try:
                e = i.td.findAll('a')[1]
                u = e['href']
                t = e.img.next_sibling[1:]
                seriesl.append((u,t.encode('utf-8')))
            except:
                pass

        if not seriesl:
            # signifies no manga found
            raise self.MangaNotFound("Nonexistent.")

        manga = self.selectFromResults(seriesl)
        if self.verbose_FLAG:
            print(manga)
        mname = [i for i in seriesl if i[0] == manga][0][1]
        s = getSourceCode(manga, self.proxy)
        soup = BeautifulSoup(s)
        t = soup.find("table", class_="chapters_list").tbody
        cl = t.find_all("tr", class_="lang_English")
        self.chapters = [[]]
        cnum = self.chapters[0]

        for i in cl:
            u = i.td.a['href']
            t = i.td.a.img.next_sibling[1:]
            g = i.find_all("td")[2].get_text().strip()

            try:
                c = float(re.search("ch([\d.]+)", u).group(1))
                c = str(int(c)) if c.is_integer() else str(c)
            except AttributeError:
                c = 0
            tu = (u,t,c,g)
            if len(cnum) == 0 or cnum[0][3] == c:
                cnum.append(tu)
            else:
                self.chapters.append([])
                cnum = self.chapters[-1]
                cnum.append(tu)

        self.chapters.reverse()

        #Look for first chapter that should be downloaded in auto mode
        lowerRange = 0
        if (self.auto):
            for i in range(0, len(self.chapters)):
                if (self.lastDownloaded == self.chapters[i][0][1]):
                    lowerRange = i + 1

        sc = None
        for i in self.chapters:
            if len(i) == 1 or sc == None:
                if sc != None and sc[2] != i[0][2]:
                    if self.verbose_FLAG:
                        print("switched to {} at {}".format(i[0][2], i[0][3]))
                sc = i[0]
                del i[1:]
                continue
            ll = [n for n in i if n[2] == sc[2]]
            if len(ll) != 1:
                c = self.get_next_url(sc[0])
                i[0] = [n for n in i if n[0] == c][0]
                if self.verbose_FLAG:
                    print("Anomaly at chapter {} ({} matches, chose {})".format(i[0][3], len(ll), i[0][2]))
                del i[1:]
                sc = i[0]
                continue
            i[0] = ll[0]
            sc = i[0]
            del i[1:]
        self.chapters = [i[0] for i in self.chapters]

        upperRange = len(self.chapters)
        # which ones do we want?
        if (not self.auto):
            for n,c in enumerate(self.chapters):
                print("{:03d}. {}".format(n+1, c[1].encode('utf-8')))
            self.chapters_to_download = self.selectChapters(self.chapters)
        # XML component
        else:
            if ( lowerRange == upperRange):
                raise self.NoUpdates

            for i in range (lowerRange, upperRange):
                self.chapters_to_download.append(i)
        return
Example #21
0
    def parseSite(self):
        """
		Parses list of chapters and URLs associated with each one for the given manga and site.
		"""

        print("Beginning MangaFox check: %s" % self.manga)

        # jump straight to expected URL and test if manga removed
        url = "http://mangafox.me/manga/%s/" % self.fixFormatting(self.manga)
        if self.verbose_FLAG:
            print(url)

        source, redirectURL = getSourceCode(url, self.proxy, True)

        if redirectURL != url or source is None or "the page you have requested cannot be found" in source:
            # Could not find the manga page by guessing
            # Use the website search
            url = "http://mangafox.me/search.php?name_method=bw&name=%s&is_completed=&advopts=1" % "+".join(
                self.manga.split()
            )
            if self.verbose_FLAG:
                print(url)
            try:
                source = getSourceCode(url, self.proxy)
                seriesResults = []
                if source is not None:
                    seriesResults = MangaFox.re_getSeries.findall(source)

                if 0 == len(seriesResults):
                    url = "http://mangafox.me/search.php?name_method=cw&name=%s&is_completed=&advopts=1" % "+".join(
                        self.manga.split()
                    )
                    if self.verbose_FLAG:
                        print(url)
                    source = getSourceCode(url, self.proxy)
                    if source is not None:
                        seriesResults = MangaFox.re_getSeries.findall(source)

                        # 0 results
            except AttributeError:
                raise self.MangaNotFound("It doesn't exist, or cannot be resolved by autocorrect.")
            else:
                keyword = self.selectFromResults(seriesResults)
                if self.verbose_FLAG:
                    print("Keyword: %s" % keyword)
                url = "http://mangafox.me/manga/%s/" % keyword
                if self.verbose_FLAG:
                    print("URL: %s" % url)
                source = getSourceCode(url, self.proxy)

                if source is None:
                    raise self.MangaNotFound("Search Failed to find Manga.")
        else:
            # The Guess worked
            keyword = self.fixFormatting(self.manga)
            if self.verbose_FLAG:
                print("Keyword: %s" % keyword)

        if "it is not available in Manga Fox." in source:
            raise self.MangaNotFound("It has been removed.")

            # that's nice of them
            # url = 'http://mangafox.me/cache/manga/%s/chapters.js' % keyword
            # source = getSourceCode(url, self.proxy)

            # chapters is a 2-tuple
            # chapters[0] contains the chapter URL
            # chapters[1] contains the chapter title

        isChapterOnly = False

        # can't pre-compile this because relies on class name
        re_getChapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title' % keyword)
        self.chapters = re_getChapters.findall(source)
        if not self.chapters:
            if self.verbose_FLAG:
                print("Trying chapter only regex")
            isChapterOnly = True
            re_getChapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(c[\d]+)/[^"]*?" title' % keyword)
            self.chapters = re_getChapters.findall(source)

        self.chapters.reverse()

        # code used to both fix URL from relative to absolute as well as verify last downloaded chapter for XML component
        lowerRange = 0

        if isChapterOnly:
            for i in range(0, len(self.chapters)):
                if self.verbose_FLAG:
                    print("%s" % self.chapters[i])
                if not self.auto:
                    print("(%i) %s" % (i + 1, self.chapters[i]))
                else:
                    if self.lastDownloaded == self.chapters[i]:
                        lowerRange = i + 1

                self.chapters[i] = (
                    "http://mangafox.me/manga/%s/%s" % (keyword, self.chapters[i]),
                    self.chapters[i],
                    self.chapters[i],
                )

        else:
            for i in range(0, len(self.chapters)):
                if self.verbose_FLAG:
                    print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
                self.chapters[i] = (
                    "http://mangafox.me/manga/%s/%s/%s" % (keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1],
                    self.chapters[i][1],
                )
                if not self.auto:
                    print("(%i) %s" % (i + 1, self.chapters[i][1]))
                else:
                    if self.lastDownloaded == self.chapters[i][1]:
                        lowerRange = i + 1

                        # this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
        upperRange = len(self.chapters)

        # which ones do we want?
        if not self.auto:
            self.chapters_to_download = self.selectChapters(self.chapters)
            # XML component
        else:
            if lowerRange == upperRange:
                raise self.NoUpdates

            for i in range(lowerRange, upperRange):
                self.chapters_to_download.append(i)
        return
Example #22
0
	def parseSite(self):
		"""
		Parses list of chapters and URLs associated with each one for the given manga and site.
		"""

		print('Beginning MangaHere check: %s' % self.manga)

		# jump straight to expected URL and test if manga removed
		url = 'http://www.mangahere.com/manga/%s/' % self.fixFormatting( self.manga )
		if self.verbose_FLAG:
			print(url)
		source = getSourceCode(url, self.proxy)

		if (source is None or 'the page you have requested can' in source):
			# do a 'begins-with' search, then a 'contains' search
			url = 'http://www.mangahere.com/search.php?name=%s' % '+'.join(self.manga.split())
			if self.verbose_FLAG:
				print(url)

			try:
				source = getSourceCode(url, self.proxy)
				if('Sorry you have just searched, please try 5 seconds later.' in source):
					print('Searched too soon, waiting 5 seconds...')
					time.sleep(5)
				seriesResults = MangaHere.re_getSeries.findall(source)

				seriesResults = []
				if source is not None:
					seriesResults = MangaHere.re_getSeries.findall(source)

				if (0 == len(seriesResults) ):
					url = 'http://www.mangahere.com/search.php?name=%s' % '+'.join(self.manga.split())
					if self.verbose_FLAG:
						print(url)
					source = getSourceCode(url, self.proxy)
					if source is not None:
						seriesResults = MangaHere.re_getSeries.findall(source)

			# 0 results
			except AttributeError:
				raise self.MangaNotFound('It doesn\'t exist, or cannot be resolved by autocorrect.')
			else:
				keyword = self.selectFromResults(seriesResults)
				if self.verbose_FLAG:
					print ("Keyword: %s" % keyword)
				url = 'http://www.mangahere.com/manga/%s/' % keyword
				if self.verbose_FLAG:
					print(url)
				source = getSourceCode(url, self.proxy)

		else:
			# The Guess worked
			keyword = self.fixFormatting( self.manga )
			if self.verbose_FLAG:
				print ("Keyword: %s" % keyword)


		# other check for manga removal if our initial guess for the name was wrong
		if('it is not available in.' in source):
			raise self.MangaNotFound('It has been removed.')

		# that's nice of them
		#url = 'http://www.mangahere.com/cache/manga/%s/chapters.js' % keyword
		#source = getSourceCode(url, self.proxy)

		# chapters is a 2-tuple
		# chapters[0] contains the chapter URL
		# chapters[1] contains the chapter title

		isChapterOnly = False

		# can't pre-compile this because relies on class name
		re_getChapters = re.compile('a.*?href="http://.*?mangahere.*?/manga/%s/(v[\d]+)/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword)
		self.chapters = re_getChapters.findall(source)
		if not self.chapters:
			if self.verbose_FLAG:
				print ("Trying chapter only regex")
			isChapterOnly = True
			re_getChapters = re.compile('a.*?href="http://.*?mangahere.*?/manga/%s/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword)
			self.chapters = re_getChapters.findall(source)

		#Sort chapters by volume and chapter number. Needed because next chapter isn't always accurate.
		self.chapters = sorted(self.chapters, cmp=self.chapter_compare)

		# code used to both fix URL from relative to absolute as well as verify last downloaded chapter for XML component
		lowerRange = 0

		if isChapterOnly:
			for i in range(0, len(self.chapters)):
				if self.verbose_FLAG:
					print("%s" % self.chapters[i][0])
				if (self.auto):
					if (self.lastDownloaded == self.chapters[i][0]):
						lowerRange = i + 1

				self.chapters[i] = ('http://www.mangahere.com/manga/%s/%s' % (keyword, self.chapters[i][0]), self.chapters[i][0], self.chapters[i][0])

		else:
			for i in range(0, len(self.chapters)):
				if self.verbose_FLAG:
					print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
				self.chapters[i] = ('http://www.mangahere.com/manga/%s/%s/%s' % (keyword, self.chapters[i][0], self.chapters[i][1]), self.chapters[i][0] + "." + self.chapters[i][1], self.chapters[i][1])
				if (self.auto):
					if (self.lastDownloaded == self.chapters[i][1]):
						lowerRange = i + 1

		# this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
		upperRange = len(self.chapters)

		# Validate whether the last chapter is a
		if (self.verbose_FLAG):
			print(self.chapters[upperRange - 1])
			print("Validating chapter: %s" % self.chapters[upperRange - 1][0])
		source = getSourceCode(self.chapters[upperRange - 1][0], self.proxy)

		if ('not available yet' in source) or ('Sorry, the page you have requested can’t be found' in source):
			# If the last chapter is not available remove it from the list
			del self.chapters[upperRange - 1]
			upperRange = upperRange - 1;


		# which ones do we want?
		if (not self.auto):
			for i in range(0, upperRange):
				if isChapterOnly:
					print('(%i) %s' % (i + 1, self.chapters[i][0]))
				else:
					print('(%i) %s' % (i + 1, self.chapters[i][1]))

			self.chapters_to_download = self.selectChapters(self.chapters)
		# XML component
		else:
			if ( lowerRange == upperRange):
				raise self.NoUpdates

			for i in range (lowerRange, upperRange):
				self.chapters_to_download.append(i)
		return
	def parseSite(self):
		"""
		Parses list of chapters and URLs associated with each one for the given manga and site.
		"""
		
		print('Beginning MangaFox check: %s' % self.manga)

		# jump straight to expected URL and test if manga removed
		url = 'http://www.mangafox.com/manga/%s/' % self.fixFormatting( self.manga )
		if self.verbose_FLAG:
			print(url)
		source = getSourceCode(url)
		if('it is not available in Manga Fox.' in source):
			raise self.MangaNotFound('It has been removed.')
		
		# do a 'begins-with' search, then a 'contains' search
		url = 'http://www.mangafox.com/search.php?name_method=bw&name=%s' % '+'.join(self.manga.split())
		if self.verbose_FLAG:
			print(url)
		try:
			source = getSourceCode(url)
			seriesResults = MangaFox.re_getSeries.findall(source)
			if (0 == len(seriesResults) ):
				url = 'http://www.mangafox.com/search.php?name=%s' % '+'.join(self.manga.split())
				if self.verbose_FLAG:
					print(url)
				source = getSourceCode(url)
				seriesResults = MangaFox.re_getSeries.findall(source)
				
		# 0 results
		except AttributeError:
			raise self.MangaNotFound('It doesn\'t exist, or cannot be resolved by autocorrect.')
		else:	
			keyword = self.selectFromResults(seriesResults)
			if self.verbose_FLAG:
				print ("Keyword: %s" % keyword)
			url = 'http://www.mangafox.com/manga/%s/' % keyword
			source = getSourceCode(url)
			# other check for manga removal if our initial guess for the name was wrong
			if('it is not available in Manga Fox.' in source):
				raise self.MangaNotFound('It has been removed.')
		
			# that's nice of them
			#url = 'http://www.mangafox.com/cache/manga/%s/chapters.js' % keyword
			#source = getSourceCode(url)
		
			# chapters is a 2-tuple
			# chapters[0] contains the chapter URL
			# chapters[1] contains the chapter title
			
			# can't pre-compile this because relies on class name
			re_getChapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title' % keyword)
			self.chapters = re_getChapters.findall(source)
			self.chapters.reverse()
			
			# code used to both fix URL from relative to absolute as well as verify last downloaded chapter for XML component
			lowerRange = 0
		
			for i in range(0, len(self.chapters)):
				#print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
				self.chapters[i] = ('http://www.mangafox.com/manga/%s/%s/%s' % (keyword, self.chapters[i][0], self.chapters[i][1]), self.chapters[i][0] + "." + self.chapters[i][1], self.chapters[i][1])
				if (not self.auto):
					print('(%i) %s' % (i + 1, self.chapters[i][1]))
				else:
					if (self.lastDownloaded == self.chapters[i][1]):
						lowerRange = i + 1

			# this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
			upperRange = len(self.chapters)
			
			# which ones do we want?
			if (not self.auto):
				self.chapters_to_download = self.selectChapters(self.chapters)
			# XML component
			else:
				if ( lowerRange == upperRange):
					raise self.NoUpdates
				
				for i in range (lowerRange, upperRange):
					self.chapters_to_download.append(i)
			return 		
Example #24
0
 def get_next_url(self, c):
     s = getSourceCode(c, self.proxy)
     soup = BeautifulSoup(s)
     l = soup.find("img", title="Next Chapter").parent
     return l['href']