def series_view(self): _ = self.R slug = _.matchdict.get('seriesSlug', "No Title") present = arrow.utcnow() qry = Manga.query manga = qry.filter(Manga.slug == slug.strip()).first() if manga is not None: filename = '/'.join([manga.id, manga.thumb]) thumb = _.storage.url(filename) aka = utils.HTMLUnscaped(manga.aka) artists = utils.HTMLUnscaped(manga.get_artist()) authors = utils.HTMLUnscaped(manga.get_authors()) description = utils.HTMLUnscaped(manga.description) name = utils.HTMLUnscaped(manga.title) last = Manga.last_chapter(manga.id) time = manga.chapter_updated.strftime('%b %d, %Y') stags = manga.get_genre_tostr() tags = [dict(label=tag, value=slugist(tag)) for tag in stags.split(',')] results = [] last_chapter = '' last_url = '' if last is not None: last_chapter = ' '.join([str(last.chapter), last.title]) last_url = '/'.join([manga.slug, last.slug]) manga.updated_viewed() chapters = Chapter.query.filter_by(tb_manga_id=manga.id).order_by(desc(Chapter.sortorder)).all() for chapter in chapters: results.append(dict( name=' '.join(['Ch.', str(chapter.chapter).replace('.0', ''), chapter.title]), url='/'.join([manga.slug, chapter.slug, "0"]), time=chapter.updated.strftime('%b %d, %Y') )) return dict( aka=aka, url='/manga/{slug}'.format(slug=slug), thumb_url=thumb, artists=artists, authors=authors, description=description, name=name, tags=tags, time=time, last_chapter=last_chapter, last_url=last_url, chapters=results ) return None
def _aka(self, soup): aka = soup.find('td', text='Alt Names:').next_siblings results = [] for sibling in aka: if type(sibling) == bs4.element.Tag and sibling.name == 'td': span_tags = sibling.find_all('span') for tag in span_tags: name = utils.HTMLUnscaped(tag.text.strip()) if name not in results: results.append(name) return results
def chapter_view(self): _ = self.R slug = _.matchdict.get('seriesSlug', "No Title") chap_slug = _.matchdict.get('chapterSlug', "No Title") # cari manga by slug manga = Manga.query.filter(Manga.slug == slug).first() if manga is not None: filename = '/'.join([manga.id, manga.thumb]) thumb = _.storage.url(filename) aka = utils.HTMLUnscaped(manga.aka) artists = utils.HTMLUnscaped(manga.get_artist()) authors = utils.HTMLUnscaped(manga.get_authors()) description = utils.HTMLUnscaped(manga.description) name = utils.HTMLUnscaped(manga.title) last = Manga.last_chapter(manga.id) last_chapter = ' '.join([str(last.chapter), last.title]) # cari chapter manga chapter = manga.get_chapter(manga, chap_slug) ch = chapter.title if chapter.title == str(chapter.chapter) \ .replace('.0', '') else '{ch} {chapter}' \ .format(ch=str(chapter.chapter).replace('.0', ''), chapter=chapter.title) return dict( aka=aka, url='/chapter/{slug}/{chap}'.format(slug=slug, chap=chap_slug), thumb_url=thumb, artists=artists, authors=authors, description='Read newest {title} {ch} online'.format( ch=ch, title=manga.title ), name='{title} {ch}'.format( ch=ch, title=manga.title ), genres=manga.get_genre_tostr(), last_chapter=last_chapter, series_url=manga.slug ) return {'project': 'moori'}
def _description(self, soup): siblings = soup.find('td', text='Description:').next_siblings for s in siblings: if type(s) == bs4.element.Tag: # Batoto stuffs the whole description inside 1 single <p> tag, # using <br/> for line breaks. BeautifulSoup's get_text() # ignores those br tags by default, but get_text('separator') # replaces them with the provided separator, so we can split # the result using that same separator and have a proper list # of paragraphs. Neat eh? return utils.HTMLUnscaped(s.get_text('|||').split('|||')) return ['unknown']
def script_to_sys(self, source_url, source_origin, time_str): """ untuk url API mangaeden + id manga[:24] python scripts/mangascrapper.py -s https://www.mangaeden.com/api/manga/4e70ea1dc092255ef7004d5c/ -o http://www.mangaeden.com/en/en-manga/fairy-tail/ -t "Aug 31, 2015" :param self: Manga API :param source_url: url data manga untuk di scrap :param source_origin: url sumber manga :param time: chapter release terakhir Agust 30, 2015(string time) : fairy tail https://www.mangaeden.com/api/manga/4e70ea1dc092255ef7004d5c/ : naruto http://www.mangaeden.com/api/manga/4e70ea03c092255ef70046f0/ : one piece http://www.mangaeden.com/api/manga/4e70ea10c092255ef7004aa2/ : bleach http://www.mangaeden.com/api/manga/4e70e9efc092255ef7004274/ : nanatsu http://www.mangaeden.com/api/manga/5099a865c092254a2000daf4/ :return: """ try: # print(url) # "{url}/api/manga/{id}/".format(url=self.netlocs[2], id=origin_url[:24]) # https://www.mangaeden.com/api/manga/:id[:24]/ resp_content = self.get_html(source_url) series_info = self.series_info(resp_content) time_long = self._parse_update_date(time_str) if isinstance( time_str, basestring) else long(time_str) # series == manga qry = Manga.query manga = qry.filter(Manga.slug == utils.slugist("-".join( [self.netlocs[4], series_info.get('name', None)]))).first() if manga is None: with transaction.manager: manga = Manga( self.netlocs[4], utils.HTMLUnscaped(series_info.get('name', u'')), 0, ", ".join(series_info.get('tags', [])), series_info.get('authors', u''), series_info.get('artists', u''), utils.HTMLUnscaped(series_info.get('aka', u'')), utils.HTMLUnscaped(series_info.get('description', u'')), 1 if 'ongoing' in series_info.get( 'status', '').lower() else 2 if 'completed' in series_info.get('status', '').lower() else 0) # manga.id = utils.guid() manga.origin = source_origin manga.chapter_updated = datetime.fromtimestamp(time_long) ext = series_info.get('thumb_url', '').lower().split('.')[-1] manga.thumb = '.'.join(['cover', ext]) manga.category = 'ja' DBSession.add(manga) DBSession.flush() manga = qry.filter(Manga.slug == utils.slugist("-".join( [self.netlocs[4], series_info.get('name', None)]))).first() manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug ini_path = path.join(path.dirname(path.dirname(__file__)), '/'.join(['rak', 'manga', manga_id])) r = requests.get(series_info.get('thumb_url', '')) path_img = '/'.join([ini_path, manga_thumb]) print(path_img) if not path.exists(ini_path): makedirs(ini_path) with open(path_img, "wb") as code: code.write(r.content) chapters_info = series_info.get('chapters', []) for i, ch in enumerate(chapters_info): print(ch.get('name', '')) ch_name = str(ch.get( 'order', 0)) if ch.get('name', '') is None else utils.HTMLUnscaped( ch.get('name', u'')) # edenapi slug slug_bt = ch_name # if ':' in slug_bt: # slug_bt = slug_bt.split(':') # slug_bt.pop(0) # slug_bt = '-'.join(slug_bt) slug_chapter = ' '.join([manga_slug, slug_bt]) # cek chapter sudah didownload chapter = Chapter.query.filter( Chapter.slug == utils.slugist(slug_chapter)).first() if chapter is None: v = utils.parse_number(ch_name, "Vol") v = 0 if v is None else v c = ch.get('order', 0) with transaction.manager: chapter = Chapter(slug_bt, c, v) time = datetime.fromtimestamp( ch.get('time', datetime.now())) # chapter.id = utils.guid() ch_manga = Manga.query.get(manga_id) ch_manga.chapter_count += 1 chapter.lang = ISOLang.query.filter( ISOLang.iso == 'en').first() chapter.updated = time chapter.manga = ch_manga # s = 1000v + c # chapter.sortorder = (1000*float(v)) + float(c) chapter.sortorder = float(c) chapter.slug = slug_chapter DBSession.add(chapter) DBSession.flush() chapter = Chapter.query.filter( Chapter.slug == utils.slugist(slug_chapter)).first() # eden headers = { 'content-type': 'application/json; charset=utf8' } html = self.get_html(ch.get('url'), headers=headers) # # ambil image dan download locally di folder chapter.id chapter_info = self.chapter_info(html) try: # series info # chapter info and images session = FuturesSession(executor=ThreadPoolExecutor( max_workers=10)) for n, page in enumerate(chapter_info.get('pages', [])): ini_chapter = '/'.join([ini_path, chapter.id]) print(page) r = session.get(page).result() if r.status_code != 200: print('continue chapter') continue # raise HtmlError('cannot fetch') # path_img = '/'.join([ini_chapter, page.split('/')[-1]]) ext = page.split('/')[-1].rsplit('.', 1)[-1] path_img = '/'.join([ ini_chapter, "{num:03d}.{ext}".format(num=n, ext=ext) ]) print(path_img) if not path.exists(ini_chapter): makedirs(ini_chapter) with open(path_img, "wb") as code: code.write(r.content) except ConnectionError as Conn: print(Conn) chapter = Chapter.query.get(chapter.id) DBSession.delete(chapter) shutil.rmtree(ini_chapter) except AttributeError as e: print(e.message) except KeyError as e: print(e.message) except ValueError as e: print(e.message)
def build_to_sys(self, site, source): try: url = source.get('last_url') # print(url) resp_content = site.get_html(url) series_info = site.series_info(resp_content) # series == manga qry = Manga.query manga = qry.filter(Manga.slug == utils.slugist("-".join( [site.netlocs[4], series_info.get('name', None)]))).first() if manga is None: with transaction.manager: manga = Manga( site.netlocs[4], utils.HTMLUnscaped(series_info.get('name', u'')), 0, ", ".join(series_info.get('tags', [])), series_info.get('authors', u''), series_info.get('artists', u''), utils.HTMLUnscaped(series_info.get('aka', u'')), utils.HTMLUnscaped(series_info.get('description', u'')), 1 if 'ongoing' in series_info.get( 'status', '').lower() else 2 if 'completed' in series_info.get('status', '').lower() else 0) # manga.id = utils.guid() manga.origin = source.get('origin', '') manga.chapter_updated = datetime.fromtimestamp( source.get('time', 'now')) ext = series_info.get('thumb_url', '').lower().rsplit('.', 1)[-1] manga.thumb = '.'.join(['cover', ext]) manga.category = 'ja' DBSession.add(manga) DBSession.flush() manga = qry.filter(Manga.slug == utils.slugist("-".join( [site.netlocs[4], series_info.get('name', None)]))).first() manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug ini_path = path.join(path.dirname(path.dirname(__file__)), '/'.join(['rak', 'manga', manga_id])) r = requests.get(series_info.get('thumb_url', '')) path_img = '/'.join([ini_path, manga_thumb]) print(path_img) if not path.exists(ini_path): makedirs(ini_path) with open(path_img, "wb") as code: code.write(r.content) chapters_info = series_info.get('chapters', []) for i, ch in enumerate(chapters_info): print(ch.get('name', '')) ch_name = str(ch.get( 'order', 0)) if ch.get('name', '') is None else utils.HTMLUnscaped( ch.get('name', u'')) # edenapi slug slug_bt = ch_name # if ':' in slug_bt: # slug_bt = slug_bt.split(':') # slug_bt.pop(0) # slug_bt = '-'.join(slug_bt) slug_chapter = ' '.join([manga_slug, slug_bt]) # cek chapter sudah didownload chapter = Chapter.query.filter( Chapter.slug == utils.slugist(slug_chapter)).first() if chapter is None: v = utils.parse_number(ch_name, "Vol") v = 0 if v is None else v c = ch.get('order', 0) with transaction.manager: chapter = Chapter(slug_bt, c, v) time = datetime.fromtimestamp( ch.get('time', datetime.now())) # chapter.id = utils.guid() ch_manga = Manga.query.get(manga_id) ch_manga.chapter_count += 1 chapter.lang = ISOLang.query.filter( ISOLang.iso == 'en').first() chapter.updated = time chapter.manga = ch_manga # s = 1000v + c # chapter.sortorder = (1000*float(v)) + float(c) chapter.sortorder = float(c) chapter.slug = slug_chapter DBSession.add(chapter) DBSession.flush() chapter = Chapter.query.filter( Chapter.slug == utils.slugist(slug_chapter)).first() # eden headers = { 'content-type': 'application/json; charset=utf8' } html = site.get_html(ch.get('url'), headers=headers) # # ambil image dan download locally di folder chapter.id chapter_info = site.chapter_info(html) try: # series info # chapter info and images session = FuturesSession(executor=ThreadPoolExecutor( max_workers=10)) for n, page in enumerate(chapter_info.get('pages', [])): ini_chapter = '/'.join([ini_path, chapter.id]) print(page) r = session.get(page).result() if r.status_code != 200: # raise HtmlError('cannot fetch') continue # path_img = '/'.join([ini_chapter, page.split('/')[-1]]) ext = page.split('/')[-1].rsplit('.', 1)[-1] path_img = '/'.join([ ini_chapter, "{num:03d}.{ext}".format(num=n, ext=ext) ]) print(path_img) if not path.exists(ini_chapter): makedirs(ini_chapter) with open(path_img, "wb") as code: code.write(r.content) except ConnectionError as Conn: print(Conn) chapter = Chapter.query.get(chapter.id) DBSession.delete(chapter) shutil.rmtree(ini_chapter) except AttributeError as e: print(e.message) except KeyError as e: print(e.message) except ValueError as e: print(e.message)
def _description(self, jsoup): return utils.HTMLUnscaped(jsoup['description'])
def _alias(self, jsoup): return ', '.join([utils.HTMLUnscaped(aka) for aka in jsoup['aka']])
def _name(self, jsoup): return utils.HTMLUnscaped(jsoup['title'])
def _chapter_name(self, soup): select = soup.find('select', attrs={'name': 'chapter_select'}) return utils.HTMLUnscaped(select.find('option', selected=True).text.strip())
def _name(self, soup): return utils.HTMLUnscaped(soup.find('h1', class_='ipsType_pagetitle').contents[0].strip())