コード例 #1
0
ファイル: main.py プロジェクト: suryakencana007/test-git-flow
    def series_view(self):
        _ = self.R
        slug = _.matchdict.get('seriesSlug', "No Title")
        present = arrow.utcnow()
        qry = Manga.query
        manga = qry.filter(Manga.slug == slug.strip()).first()
        if manga is not None:
            filename = '/'.join([manga.id, manga.thumb])
            thumb = _.storage.url(filename)
            aka = utils.HTMLUnscaped(manga.aka)
            artists = utils.HTMLUnscaped(manga.get_artist())
            authors = utils.HTMLUnscaped(manga.get_authors())
            description = utils.HTMLUnscaped(manga.description)
            name = utils.HTMLUnscaped(manga.title)
            last = Manga.last_chapter(manga.id)
            time = manga.chapter_updated.strftime('%b %d, %Y')
            stags = manga.get_genre_tostr()
            tags = [dict(label=tag, value=slugist(tag)) for tag in stags.split(',')]

            results = []
            last_chapter = ''
            last_url = ''
            if last is not None:
                last_chapter = ' '.join([str(last.chapter), last.title])
                last_url = '/'.join([manga.slug, last.slug])

                manga.updated_viewed()
                chapters = Chapter.query.filter_by(tb_manga_id=manga.id).order_by(desc(Chapter.sortorder)).all()
                for chapter in chapters:
                    results.append(dict(
                        name=' '.join(['Ch.', str(chapter.chapter).replace('.0', ''), chapter.title]),
                        url='/'.join([manga.slug, chapter.slug, "0"]),
                        time=chapter.updated.strftime('%b %d, %Y')
                    ))

            return dict(
                aka=aka,
                url='/manga/{slug}'.format(slug=slug),
                thumb_url=thumb,
                artists=artists,
                authors=authors,
                description=description,
                name=name,
                tags=tags,
                time=time,
                last_chapter=last_chapter,
                last_url=last_url,
                chapters=results
            )
        return None
コード例 #2
0
 def _aka(self, soup):
     aka = soup.find('td', text='Alt Names:').next_siblings
     results = []
     for sibling in aka:
         if type(sibling) == bs4.element.Tag and sibling.name == 'td':
             span_tags = sibling.find_all('span')
             for tag in span_tags:
                 name = utils.HTMLUnscaped(tag.text.strip())
                 if name not in results:
                     results.append(name)
     return results
コード例 #3
0
ファイル: main.py プロジェクト: suryakencana007/test-git-flow
    def chapter_view(self):
        _ = self.R
        slug = _.matchdict.get('seriesSlug', "No Title")
        chap_slug = _.matchdict.get('chapterSlug', "No Title")

        # cari manga by slug
        manga = Manga.query.filter(Manga.slug == slug).first()
        if manga is not None:
            filename = '/'.join([manga.id, manga.thumb])
            thumb = _.storage.url(filename)
            aka = utils.HTMLUnscaped(manga.aka)
            artists = utils.HTMLUnscaped(manga.get_artist())
            authors = utils.HTMLUnscaped(manga.get_authors())
            description = utils.HTMLUnscaped(manga.description)
            name = utils.HTMLUnscaped(manga.title)
            last = Manga.last_chapter(manga.id)
            last_chapter = ' '.join([str(last.chapter), last.title])
            # cari chapter manga
            chapter = manga.get_chapter(manga, chap_slug)
            ch = chapter.title if chapter.title == str(chapter.chapter) \
                .replace('.0', '') else '{ch} {chapter}' \
                .format(ch=str(chapter.chapter).replace('.0', ''), chapter=chapter.title)
            return dict(
                aka=aka,
                url='/chapter/{slug}/{chap}'.format(slug=slug, chap=chap_slug),
                thumb_url=thumb,
                artists=artists,
                authors=authors,
                description='Read newest {title} {ch} online'.format(
                    ch=ch,
                    title=manga.title
                ),
                name='{title} {ch}'.format(
                    ch=ch,
                    title=manga.title
                ),
                genres=manga.get_genre_tostr(),
                last_chapter=last_chapter,
                series_url=manga.slug
            )
        return {'project': 'moori'}
コード例 #4
0
 def _description(self, soup):
     siblings = soup.find('td', text='Description:').next_siblings
     for s in siblings:
         if type(s) == bs4.element.Tag:
             # Batoto stuffs the whole description inside 1 single <p> tag,
             # using <br/> for line breaks. BeautifulSoup's get_text()
             # ignores those br tags by default, but get_text('separator')
             # replaces them with the provided separator, so we can split
             # the result using that same separator and have a proper list
             # of paragraphs.  Neat eh?
             return utils.HTMLUnscaped(s.get_text('|||').split('|||'))
     return ['unknown']
コード例 #5
0
    def script_to_sys(self, source_url, source_origin, time_str):
        """
        untuk url API mangaeden + id manga[:24]
        python scripts/mangascrapper.py -s https://www.mangaeden.com/api/manga/4e70ea1dc092255ef7004d5c/ -o http://www.mangaeden.com/en/en-manga/fairy-tail/ -t "Aug 31, 2015"

        :param self: Manga API
        :param source_url: url data manga untuk di scrap
        :param source_origin: url sumber manga
        :param time: chapter release terakhir Agust 30, 2015(string time)
        : fairy tail https://www.mangaeden.com/api/manga/4e70ea1dc092255ef7004d5c/
        : naruto http://www.mangaeden.com/api/manga/4e70ea03c092255ef70046f0/
        : one piece http://www.mangaeden.com/api/manga/4e70ea10c092255ef7004aa2/
        : bleach http://www.mangaeden.com/api/manga/4e70e9efc092255ef7004274/
        : nanatsu http://www.mangaeden.com/api/manga/5099a865c092254a2000daf4/
        :return:
        """
        try:
            # print(url)
            # "{url}/api/manga/{id}/".format(url=self.netlocs[2], id=origin_url[:24])
            # https://www.mangaeden.com/api/manga/:id[:24]/
            resp_content = self.get_html(source_url)
            series_info = self.series_info(resp_content)
            time_long = self._parse_update_date(time_str) if isinstance(
                time_str, basestring) else long(time_str)
            # series == manga
            qry = Manga.query
            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [self.netlocs[4],
                 series_info.get('name', None)]))).first()
            if manga is None:
                with transaction.manager:
                    manga = Manga(
                        self.netlocs[4],
                        utils.HTMLUnscaped(series_info.get('name', u'')), 0,
                        ", ".join(series_info.get('tags', [])),
                        series_info.get('authors', u''),
                        series_info.get('artists', u''),
                        utils.HTMLUnscaped(series_info.get('aka', u'')),
                        utils.HTMLUnscaped(series_info.get('description',
                                                           u'')),
                        1 if 'ongoing' in series_info.get(
                            'status', '').lower() else 2 if 'completed'
                        in series_info.get('status', '').lower() else 0)
                    # manga.id = utils.guid()
                    manga.origin = source_origin
                    manga.chapter_updated = datetime.fromtimestamp(time_long)
                    ext = series_info.get('thumb_url',
                                          '').lower().split('.')[-1]
                    manga.thumb = '.'.join(['cover', ext])
                    manga.category = 'ja'
                    DBSession.add(manga)
                    DBSession.flush()

            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [self.netlocs[4],
                 series_info.get('name', None)]))).first()
            manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug
            ini_path = path.join(path.dirname(path.dirname(__file__)),
                                 '/'.join(['rak', 'manga', manga_id]))

            r = requests.get(series_info.get('thumb_url', ''))
            path_img = '/'.join([ini_path, manga_thumb])
            print(path_img)
            if not path.exists(ini_path):
                makedirs(ini_path)
            with open(path_img, "wb") as code:
                code.write(r.content)

            chapters_info = series_info.get('chapters', [])
            for i, ch in enumerate(chapters_info):
                print(ch.get('name', ''))
                ch_name = str(ch.get(
                    'order',
                    0)) if ch.get('name', '') is None else utils.HTMLUnscaped(
                        ch.get('name', u''))
                # edenapi slug
                slug_bt = ch_name

                # if ':' in slug_bt:
                #     slug_bt = slug_bt.split(':')
                #     slug_bt.pop(0)
                #     slug_bt = '-'.join(slug_bt)

                slug_chapter = ' '.join([manga_slug, slug_bt])
                # cek chapter sudah didownload
                chapter = Chapter.query.filter(
                    Chapter.slug == utils.slugist(slug_chapter)).first()
                if chapter is None:

                    v = utils.parse_number(ch_name, "Vol")
                    v = 0 if v is None else v
                    c = ch.get('order', 0)

                    with transaction.manager:
                        chapter = Chapter(slug_bt, c, v)
                        time = datetime.fromtimestamp(
                            ch.get('time', datetime.now()))
                        # chapter.id = utils.guid()
                        ch_manga = Manga.query.get(manga_id)
                        ch_manga.chapter_count += 1
                        chapter.lang = ISOLang.query.filter(
                            ISOLang.iso == 'en').first()
                        chapter.updated = time
                        chapter.manga = ch_manga
                        # s = 1000v + c
                        # chapter.sortorder = (1000*float(v)) + float(c)
                        chapter.sortorder = float(c)
                        chapter.slug = slug_chapter
                        DBSession.add(chapter)
                        DBSession.flush()

                    chapter = Chapter.query.filter(
                        Chapter.slug == utils.slugist(slug_chapter)).first()

                    # eden
                    headers = {
                        'content-type': 'application/json; charset=utf8'
                    }
                    html = self.get_html(ch.get('url'), headers=headers)
                    # # ambil image dan download locally di folder chapter.id
                    chapter_info = self.chapter_info(html)
                    try:
                        # series info
                        # chapter info and images
                        session = FuturesSession(executor=ThreadPoolExecutor(
                            max_workers=10))

                        for n, page in enumerate(chapter_info.get('pages',
                                                                  [])):
                            ini_chapter = '/'.join([ini_path, chapter.id])
                            print(page)
                            r = session.get(page).result()
                            if r.status_code != 200:
                                print('continue chapter')
                                continue
                                # raise HtmlError('cannot fetch')
                            # path_img = '/'.join([ini_chapter, page.split('/')[-1]])
                            ext = page.split('/')[-1].rsplit('.', 1)[-1]
                            path_img = '/'.join([
                                ini_chapter, "{num:03d}.{ext}".format(num=n,
                                                                      ext=ext)
                            ])
                            print(path_img)
                            if not path.exists(ini_chapter):
                                makedirs(ini_chapter)
                            with open(path_img, "wb") as code:
                                code.write(r.content)
                    except ConnectionError as Conn:
                        print(Conn)
                        chapter = Chapter.query.get(chapter.id)
                        DBSession.delete(chapter)
                        shutil.rmtree(ini_chapter)

        except AttributeError as e:
            print(e.message)
        except KeyError as e:
            print(e.message)
        except ValueError as e:
            print(e.message)
コード例 #6
0
    def build_to_sys(self, site, source):
        try:
            url = source.get('last_url')
            # print(url)
            resp_content = site.get_html(url)
            series_info = site.series_info(resp_content)

            # series == manga
            qry = Manga.query
            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [site.netlocs[4],
                 series_info.get('name', None)]))).first()
            if manga is None:
                with transaction.manager:
                    manga = Manga(
                        site.netlocs[4],
                        utils.HTMLUnscaped(series_info.get('name', u'')), 0,
                        ", ".join(series_info.get('tags', [])),
                        series_info.get('authors', u''),
                        series_info.get('artists', u''),
                        utils.HTMLUnscaped(series_info.get('aka', u'')),
                        utils.HTMLUnscaped(series_info.get('description',
                                                           u'')),
                        1 if 'ongoing' in series_info.get(
                            'status', '').lower() else 2 if 'completed'
                        in series_info.get('status', '').lower() else 0)
                    # manga.id = utils.guid()
                    manga.origin = source.get('origin', '')
                    manga.chapter_updated = datetime.fromtimestamp(
                        source.get('time', 'now'))
                    ext = series_info.get('thumb_url',
                                          '').lower().rsplit('.', 1)[-1]
                    manga.thumb = '.'.join(['cover', ext])
                    manga.category = 'ja'
                    DBSession.add(manga)
                    DBSession.flush()

            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [site.netlocs[4],
                 series_info.get('name', None)]))).first()
            manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug
            ini_path = path.join(path.dirname(path.dirname(__file__)),
                                 '/'.join(['rak', 'manga', manga_id]))

            r = requests.get(series_info.get('thumb_url', ''))
            path_img = '/'.join([ini_path, manga_thumb])
            print(path_img)
            if not path.exists(ini_path):
                makedirs(ini_path)
            with open(path_img, "wb") as code:
                code.write(r.content)

            chapters_info = series_info.get('chapters', [])
            for i, ch in enumerate(chapters_info):
                print(ch.get('name', ''))
                ch_name = str(ch.get(
                    'order',
                    0)) if ch.get('name', '') is None else utils.HTMLUnscaped(
                        ch.get('name', u''))
                # edenapi slug
                slug_bt = ch_name

                # if ':' in slug_bt:
                #     slug_bt = slug_bt.split(':')
                #     slug_bt.pop(0)
                #     slug_bt = '-'.join(slug_bt)

                slug_chapter = ' '.join([manga_slug, slug_bt])
                # cek chapter sudah didownload
                chapter = Chapter.query.filter(
                    Chapter.slug == utils.slugist(slug_chapter)).first()
                if chapter is None:

                    v = utils.parse_number(ch_name, "Vol")
                    v = 0 if v is None else v
                    c = ch.get('order', 0)

                    with transaction.manager:
                        chapter = Chapter(slug_bt, c, v)
                        time = datetime.fromtimestamp(
                            ch.get('time', datetime.now()))
                        # chapter.id = utils.guid()
                        ch_manga = Manga.query.get(manga_id)
                        ch_manga.chapter_count += 1
                        chapter.lang = ISOLang.query.filter(
                            ISOLang.iso == 'en').first()
                        chapter.updated = time
                        chapter.manga = ch_manga
                        # s = 1000v + c
                        # chapter.sortorder = (1000*float(v)) + float(c)
                        chapter.sortorder = float(c)
                        chapter.slug = slug_chapter
                        DBSession.add(chapter)
                        DBSession.flush()

                    chapter = Chapter.query.filter(
                        Chapter.slug == utils.slugist(slug_chapter)).first()

                    # eden
                    headers = {
                        'content-type': 'application/json; charset=utf8'
                    }
                    html = site.get_html(ch.get('url'), headers=headers)
                    # # ambil image dan download locally di folder chapter.id
                    chapter_info = site.chapter_info(html)
                    try:
                        # series info
                        # chapter info and images
                        session = FuturesSession(executor=ThreadPoolExecutor(
                            max_workers=10))

                        for n, page in enumerate(chapter_info.get('pages',
                                                                  [])):
                            ini_chapter = '/'.join([ini_path, chapter.id])
                            print(page)
                            r = session.get(page).result()
                            if r.status_code != 200:
                                # raise HtmlError('cannot fetch')
                                continue
                            # path_img = '/'.join([ini_chapter, page.split('/')[-1]])
                            ext = page.split('/')[-1].rsplit('.', 1)[-1]
                            path_img = '/'.join([
                                ini_chapter, "{num:03d}.{ext}".format(num=n,
                                                                      ext=ext)
                            ])
                            print(path_img)
                            if not path.exists(ini_chapter):
                                makedirs(ini_chapter)
                            with open(path_img, "wb") as code:
                                code.write(r.content)

                    except ConnectionError as Conn:
                        print(Conn)
                        chapter = Chapter.query.get(chapter.id)
                        DBSession.delete(chapter)
                        shutil.rmtree(ini_chapter)

        except AttributeError as e:
            print(e.message)
        except KeyError as e:
            print(e.message)
        except ValueError as e:
            print(e.message)
コード例 #7
0
 def _description(self, jsoup):
     return utils.HTMLUnscaped(jsoup['description'])
コード例 #8
0
 def _alias(self, jsoup):
     return ', '.join([utils.HTMLUnscaped(aka) for aka in jsoup['aka']])
コード例 #9
0
 def _name(self, jsoup):
     return utils.HTMLUnscaped(jsoup['title'])
コード例 #10
0
 def _chapter_name(self, soup):
     select = soup.find('select', attrs={'name': 'chapter_select'})
     return utils.HTMLUnscaped(select.find('option', selected=True).text.strip())
コード例 #11
0
 def _name(self, soup):
     return utils.HTMLUnscaped(soup.find('h1', class_='ipsType_pagetitle').contents[0].strip())