Ejemplo n.º 1
0
    def save_new(self):
        _ = self.R
        with ResponseHTTP(_.response) as resp:
            _in = u'Failed'
            code, status = ResponseHTTP.BAD_REQUEST
            # for key, value in _.params.iteritems():
            #     print(":".join([key, value]))
            if _.params.get('title', None) is not None:
                manga = Manga.query.filter(Manga.id == _.params.get('series', None)).first()
                manga_slug = "-".join([manga.type, manga.title])
                lang = ISOLang.query.filter(ISOLang.iso == _.params.get('lang', 'en')).first()
                v = _.params.get('volume', 0)
                c = _.params.get('chapter', 0)

                chapter = Chapter(
                    _.params.get('title', None),
                    c if str(c).isdigit() else 0,
                    v if str(v).isdigit() else 0
                )
                slug_chapter = ' '.join([manga_slug, _.params.get('title', None)])

                manga.chapter_count += 1
                manga.updated_chapter()
                chapter.lang = lang
                chapter.updated = utils.datetime.now()
                chapter.manga = manga
                # s = 1000v + c
                # chapter.sortorder = (1000*float(v)) + float(c)
                chapter.sortorder = float(_.params.get('chapter', None))
                chapter.slug = slug_chapter
                _.db.add(chapter)
                chp_tmp = Chapter.query.filter(Chapter.slug == slugist(slug_chapter)).first()
                temps_path = _.storage.path('/'.join(['temps', _.params.get('uuid', None)]))
                print(temps_path)
                for root, dirs, files in walk(temps_path):
                    LOG.info(files)
                    for f in files:
                        fpath = '/'.join([temps_path, f])
                        fdest = _.storage.path('/'.join([manga.id, chp_tmp.id]))
                        print(fpath)
                        print(fdest)
                        extract_zip(fpath, fdest)
                _in = u'Success'
                code, status = ResponseHTTP.OK

        return resp.to_json(_in,
                            code=code,
                            status=status)
Ejemplo n.º 2
0
def build_to_sys(site, source):
    try:
        lt = LocalDateTime.now()
        """
            dict(
                thumb=self.netlocs[3] + "/".join([image_thumb.split('/')[-2], image_thumb.split('/')[-1]]),
                origin=origin_url,
                name=title,
                # time=self.parseDate.human_to_date_stamp(time),
                time=time,
                last_chapter=last_title,
                last_url=last_url,
                site=self.netlocs[1]
            )
        """
        # list latest
        # scrap series info
        # url = "/".join([site.netlocs[2], source.get('origin')])
        url = source.get('origin')
        # print(url)
        respcontent = site.get_html(url)
        series_info = site.series_info(respcontent)

        # series == manga
        qry = Manga.query
        manga = qry.filter(Manga.slug == utils.slugist(
            "-".join([site.netlocs[4], source.get('name', None)])
        )).first()
        if manga is None:
            with transaction.manager:
                manga = Manga(
                    site.netlocs[4],
                    series_info.get('name', []),
                    0,
                    ", ".join(series_info.get('tags', [])),
                    ", ".join(series_info.get('authors', [])),
                    ", ".join(series_info.get('artists', [])),
                    ', '.join(series_info.get('aka', [])),
                    ",".join(series_info.get('description', None)),
                    1 if 'ongoing' in series_info.get('status', '').lower()
                    else 2 if 'completed' in series_info.get('status', '').lower() else 0
                )
                # manga.id = utils.guid()
                manga.origin = source.get('origin', '')
                manga.chapter_updated = lt.from_time_stamp(source.get('time', 'now'))
                ext = series_info.get('thumb_url', '').lower().rsplit('.', 1)[-1]
                manga.thumb = '.'.join(['cover', ext])
                manga.category = 'ja'
                DBSession.add(manga)
                DBSession.flush()

        manga = qry.filter(Manga.slug == utils.slugist(
            "-".join([site.netlocs[4], source.get('name', None)])
        )).first()
        manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug
        ini_path = path.join(
            path.dirname(
                path.dirname(__file__)
            ),
            '/'.join(['rak', 'manga', manga_id])
        )

        r = requests.get(source.get('thumb'))
        path_img = '/'.join([ini_path, manga_thumb])
        print(path_img)
        if not path.exists(ini_path):
            makedirs(ini_path)
        with open(path_img, "wb") as code:
            code.write(r.content)

        chapters_info = series_info.get('chapters', [])
        for i, ch in enumerate(chapters_info):
            print(ch.get('name', ''))
            # batoto slug
            slug_bt = ch.get('name', '')

            if ':' in slug_bt:
                slug_bt = slug_bt.split(':')
                slug_bt.pop(0)
                slug_bt = '-'.join(slug_bt)

            slug_chapter = ' '.join([manga_slug, slug_bt])
            # cek chapter sudah didownload
            chapter = Chapter.query.filter(Chapter.slug == utils.slugist(slug_chapter)).first()
            if chapter is None:

                v = utils.parse_number(ch.get('name', ''), "Vol")
                v = 0 if v is None else v
                c = utils.parse_number(ch.get('name', ''), "Ch")
                c = 0 if c is None else c

                with transaction.manager:
                    chapter = Chapter(
                        slug_bt,
                        c,
                        v
                    )
                    time = lt.human_to_date(ch.get('time', 'now'))
                    # chapter.id = utils.guid()
                    ch_manga = Manga.query.get(manga_id)
                    ch_manga.chapter_count += 1
                    chapter.lang = ISOLang.query.filter(ISOLang.iso == 'en').first()
                    chapter.updated = time
                    chapter.manga = ch_manga
                    # s = 1000v + c
                    # chapter.sortorder = (1000*float(v)) + float(c)
                    chapter.sortorder = float(c)
                    chapter.slug = slug_chapter
                    DBSession.add(chapter)
                    DBSession.flush()

                chapter = Chapter.query.filter(Chapter.slug == utils.slugist(slug_chapter)).first()

                # batoto
                html = site.get_html(ch.get('url'))
                # # ambil image dan download locally di folder chapter.id
                chapter_info = site.chapter_info(html)
                try:
                    # series info
                    # chapter info and images
                    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))

                    for n, page in enumerate(chapter_info.get('pages', [])):
                        ini_chapter = '/'.join([ini_path, chapter.id])
                        print(page)
                        r = session.get(page).result()
                        if r.status_code != 200:
                            raise HtmlError('cannot fetch')
                        ext = page.split('/')[-1].rsplit('.', 1)[-1]
                        path_img = '/'.join([ini_chapter, "{num:03d}.{ext}".format(num=n, ext=ext)])
                        # path_img = '/'.join([ini_chapter, page.split('/')[-1]])
                        print(path_img)
                        if not path.exists(ini_chapter):
                            makedirs(ini_chapter)
                        with open(path_img, "wb") as code:
                            code.write(r.content)
                except ConnectionError as Conn:
                    print(Conn)
                    chapter = Chapter.query.get(chapter.id)
                    DBSession.delete(chapter)
                    shutil.rmtree(ini_chapter)

    except AttributeError as e:
        print(e.message)
    except KeyError as e:
        print(e.message)
    except ValueError as e:
        print(e.message)
Ejemplo n.º 3
0
    def script_to_sys(self, source_url, source_origin, time_str):
        """
        untuk url API mangaeden + id manga[:24]
        python scripts/mangascrapper.py -s https://www.mangaeden.com/api/manga/4e70ea1dc092255ef7004d5c/ -o http://www.mangaeden.com/en/en-manga/fairy-tail/ -t "Aug 31, 2015"

        :param self: Manga API
        :param source_url: url data manga untuk di scrap
        :param source_origin: url sumber manga
        :param time: chapter release terakhir Agust 30, 2015(string time)
        : fairy tail https://www.mangaeden.com/api/manga/4e70ea1dc092255ef7004d5c/
        : naruto http://www.mangaeden.com/api/manga/4e70ea03c092255ef70046f0/
        : one piece http://www.mangaeden.com/api/manga/4e70ea10c092255ef7004aa2/
        : bleach http://www.mangaeden.com/api/manga/4e70e9efc092255ef7004274/
        : nanatsu http://www.mangaeden.com/api/manga/5099a865c092254a2000daf4/
        :return:
        """
        try:
            # print(url)
            # "{url}/api/manga/{id}/".format(url=self.netlocs[2], id=origin_url[:24])
            # https://www.mangaeden.com/api/manga/:id[:24]/
            resp_content = self.get_html(source_url)
            series_info = self.series_info(resp_content)
            time_long = self._parse_update_date(time_str) if isinstance(
                time_str, basestring) else long(time_str)
            # series == manga
            qry = Manga.query
            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [self.netlocs[4],
                 series_info.get('name', None)]))).first()
            if manga is None:
                with transaction.manager:
                    manga = Manga(
                        self.netlocs[4],
                        utils.HTMLUnscaped(series_info.get('name', u'')), 0,
                        ", ".join(series_info.get('tags', [])),
                        series_info.get('authors', u''),
                        series_info.get('artists', u''),
                        utils.HTMLUnscaped(series_info.get('aka', u'')),
                        utils.HTMLUnscaped(series_info.get('description',
                                                           u'')),
                        1 if 'ongoing' in series_info.get(
                            'status', '').lower() else 2 if 'completed'
                        in series_info.get('status', '').lower() else 0)
                    # manga.id = utils.guid()
                    manga.origin = source_origin
                    manga.chapter_updated = datetime.fromtimestamp(time_long)
                    ext = series_info.get('thumb_url',
                                          '').lower().split('.')[-1]
                    manga.thumb = '.'.join(['cover', ext])
                    manga.category = 'ja'
                    DBSession.add(manga)
                    DBSession.flush()

            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [self.netlocs[4],
                 series_info.get('name', None)]))).first()
            manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug
            ini_path = path.join(path.dirname(path.dirname(__file__)),
                                 '/'.join(['rak', 'manga', manga_id]))

            r = requests.get(series_info.get('thumb_url', ''))
            path_img = '/'.join([ini_path, manga_thumb])
            print(path_img)
            if not path.exists(ini_path):
                makedirs(ini_path)
            with open(path_img, "wb") as code:
                code.write(r.content)

            chapters_info = series_info.get('chapters', [])
            for i, ch in enumerate(chapters_info):
                print(ch.get('name', ''))
                ch_name = str(ch.get(
                    'order',
                    0)) if ch.get('name', '') is None else utils.HTMLUnscaped(
                        ch.get('name', u''))
                # edenapi slug
                slug_bt = ch_name

                # if ':' in slug_bt:
                #     slug_bt = slug_bt.split(':')
                #     slug_bt.pop(0)
                #     slug_bt = '-'.join(slug_bt)

                slug_chapter = ' '.join([manga_slug, slug_bt])
                # cek chapter sudah didownload
                chapter = Chapter.query.filter(
                    Chapter.slug == utils.slugist(slug_chapter)).first()
                if chapter is None:

                    v = utils.parse_number(ch_name, "Vol")
                    v = 0 if v is None else v
                    c = ch.get('order', 0)

                    with transaction.manager:
                        chapter = Chapter(slug_bt, c, v)
                        time = datetime.fromtimestamp(
                            ch.get('time', datetime.now()))
                        # chapter.id = utils.guid()
                        ch_manga = Manga.query.get(manga_id)
                        ch_manga.chapter_count += 1
                        chapter.lang = ISOLang.query.filter(
                            ISOLang.iso == 'en').first()
                        chapter.updated = time
                        chapter.manga = ch_manga
                        # s = 1000v + c
                        # chapter.sortorder = (1000*float(v)) + float(c)
                        chapter.sortorder = float(c)
                        chapter.slug = slug_chapter
                        DBSession.add(chapter)
                        DBSession.flush()

                    chapter = Chapter.query.filter(
                        Chapter.slug == utils.slugist(slug_chapter)).first()

                    # eden
                    headers = {
                        'content-type': 'application/json; charset=utf8'
                    }
                    html = self.get_html(ch.get('url'), headers=headers)
                    # # ambil image dan download locally di folder chapter.id
                    chapter_info = self.chapter_info(html)
                    try:
                        # series info
                        # chapter info and images
                        session = FuturesSession(executor=ThreadPoolExecutor(
                            max_workers=10))

                        for n, page in enumerate(chapter_info.get('pages',
                                                                  [])):
                            ini_chapter = '/'.join([ini_path, chapter.id])
                            print(page)
                            r = session.get(page).result()
                            if r.status_code != 200:
                                print('continue chapter')
                                continue
                                # raise HtmlError('cannot fetch')
                            # path_img = '/'.join([ini_chapter, page.split('/')[-1]])
                            ext = page.split('/')[-1].rsplit('.', 1)[-1]
                            path_img = '/'.join([
                                ini_chapter, "{num:03d}.{ext}".format(num=n,
                                                                      ext=ext)
                            ])
                            print(path_img)
                            if not path.exists(ini_chapter):
                                makedirs(ini_chapter)
                            with open(path_img, "wb") as code:
                                code.write(r.content)
                    except ConnectionError as Conn:
                        print(Conn)
                        chapter = Chapter.query.get(chapter.id)
                        DBSession.delete(chapter)
                        shutil.rmtree(ini_chapter)

        except AttributeError as e:
            print(e.message)
        except KeyError as e:
            print(e.message)
        except ValueError as e:
            print(e.message)
Ejemplo n.º 4
0
    def build_to_sys(self, site, source):
        try:
            url = source.get('last_url')
            # print(url)
            resp_content = site.get_html(url)
            series_info = site.series_info(resp_content)

            # series == manga
            qry = Manga.query
            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [site.netlocs[4],
                 series_info.get('name', None)]))).first()
            if manga is None:
                with transaction.manager:
                    manga = Manga(
                        site.netlocs[4],
                        utils.HTMLUnscaped(series_info.get('name', u'')), 0,
                        ", ".join(series_info.get('tags', [])),
                        series_info.get('authors', u''),
                        series_info.get('artists', u''),
                        utils.HTMLUnscaped(series_info.get('aka', u'')),
                        utils.HTMLUnscaped(series_info.get('description',
                                                           u'')),
                        1 if 'ongoing' in series_info.get(
                            'status', '').lower() else 2 if 'completed'
                        in series_info.get('status', '').lower() else 0)
                    # manga.id = utils.guid()
                    manga.origin = source.get('origin', '')
                    manga.chapter_updated = datetime.fromtimestamp(
                        source.get('time', 'now'))
                    ext = series_info.get('thumb_url',
                                          '').lower().rsplit('.', 1)[-1]
                    manga.thumb = '.'.join(['cover', ext])
                    manga.category = 'ja'
                    DBSession.add(manga)
                    DBSession.flush()

            manga = qry.filter(Manga.slug == utils.slugist("-".join(
                [site.netlocs[4],
                 series_info.get('name', None)]))).first()
            manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug
            ini_path = path.join(path.dirname(path.dirname(__file__)),
                                 '/'.join(['rak', 'manga', manga_id]))

            r = requests.get(series_info.get('thumb_url', ''))
            path_img = '/'.join([ini_path, manga_thumb])
            print(path_img)
            if not path.exists(ini_path):
                makedirs(ini_path)
            with open(path_img, "wb") as code:
                code.write(r.content)

            chapters_info = series_info.get('chapters', [])
            for i, ch in enumerate(chapters_info):
                print(ch.get('name', ''))
                ch_name = str(ch.get(
                    'order',
                    0)) if ch.get('name', '') is None else utils.HTMLUnscaped(
                        ch.get('name', u''))
                # edenapi slug
                slug_bt = ch_name

                # if ':' in slug_bt:
                #     slug_bt = slug_bt.split(':')
                #     slug_bt.pop(0)
                #     slug_bt = '-'.join(slug_bt)

                slug_chapter = ' '.join([manga_slug, slug_bt])
                # cek chapter sudah didownload
                chapter = Chapter.query.filter(
                    Chapter.slug == utils.slugist(slug_chapter)).first()
                if chapter is None:

                    v = utils.parse_number(ch_name, "Vol")
                    v = 0 if v is None else v
                    c = ch.get('order', 0)

                    with transaction.manager:
                        chapter = Chapter(slug_bt, c, v)
                        time = datetime.fromtimestamp(
                            ch.get('time', datetime.now()))
                        # chapter.id = utils.guid()
                        ch_manga = Manga.query.get(manga_id)
                        ch_manga.chapter_count += 1
                        chapter.lang = ISOLang.query.filter(
                            ISOLang.iso == 'en').first()
                        chapter.updated = time
                        chapter.manga = ch_manga
                        # s = 1000v + c
                        # chapter.sortorder = (1000*float(v)) + float(c)
                        chapter.sortorder = float(c)
                        chapter.slug = slug_chapter
                        DBSession.add(chapter)
                        DBSession.flush()

                    chapter = Chapter.query.filter(
                        Chapter.slug == utils.slugist(slug_chapter)).first()

                    # eden
                    headers = {
                        'content-type': 'application/json; charset=utf8'
                    }
                    html = site.get_html(ch.get('url'), headers=headers)
                    # # ambil image dan download locally di folder chapter.id
                    chapter_info = site.chapter_info(html)
                    try:
                        # series info
                        # chapter info and images
                        session = FuturesSession(executor=ThreadPoolExecutor(
                            max_workers=10))

                        for n, page in enumerate(chapter_info.get('pages',
                                                                  [])):
                            ini_chapter = '/'.join([ini_path, chapter.id])
                            print(page)
                            r = session.get(page).result()
                            if r.status_code != 200:
                                # raise HtmlError('cannot fetch')
                                continue
                            # path_img = '/'.join([ini_chapter, page.split('/')[-1]])
                            ext = page.split('/')[-1].rsplit('.', 1)[-1]
                            path_img = '/'.join([
                                ini_chapter, "{num:03d}.{ext}".format(num=n,
                                                                      ext=ext)
                            ])
                            print(path_img)
                            if not path.exists(ini_chapter):
                                makedirs(ini_chapter)
                            with open(path_img, "wb") as code:
                                code.write(r.content)

                    except ConnectionError as Conn:
                        print(Conn)
                        chapter = Chapter.query.get(chapter.id)
                        DBSession.delete(chapter)
                        shutil.rmtree(ini_chapter)

        except AttributeError as e:
            print(e.message)
        except KeyError as e:
            print(e.message)
        except ValueError as e:
            print(e.message)
Ejemplo n.º 5
0
def build_to_sys(site, source):
    try:
        lt = LocalDateTime.now()
        """
            dict(
                thumb=self.netlocs[3] + "/".join([image_thumb.split('/')[-2], image_thumb.split('/')[-1]]),
                origin=origin_url,
                name=title,
                # time=self.parseDate.human_to_date_stamp(time),
                time=time,
                last_chapter=last_title,
                last_url=last_url,
                site=self.netlocs[1]
            )
        """
        # list latest
        # scrap series info
        # url = "/".join([site.netlocs[2], source.get('origin')])
        url = source.get('origin')
        # print(url)
        respcontent = site.get_html(url)
        series_info = site.series_info(respcontent)

        # series == manga
        qry = Manga.query
        manga = qry.filter(Manga.slug == utils.slugist(
            "-".join([site.netlocs[4], source.get('name', None)])
        )).first()
        if manga is None:
            with transaction.manager:
                manga = Manga(
                    site.netlocs[4],
                    series_info.get('name', []),
                    0,
                    ", ".join(series_info.get('tags', [])),
                    ", ".join(series_info.get('authors', [])),
                    ", ".join(series_info.get('artists', [])),
                    ', '.join(series_info.get('aka', [])),
                    ",".join(series_info.get('description', None)),
                    1 if 'ongoing' in series_info.get('status', '').lower()
                    else 2 if 'completed' in series_info.get('status', '').lower() else 0
                )
                # manga.id = utils.guid()
                manga.origin = source.get('origin', '')
                manga.chapter_updated = lt.from_time_stamp(source.get('time', 'now'))
                ext = series_info.get('thumb_url', '').lower().rsplit('.', 1)[-1]
                manga.thumb = '.'.join(['cover', ext])
                manga.category = 'ja'
                DBSession.add(manga)
                DBSession.flush()

        manga = qry.filter(Manga.slug == utils.slugist(
            "-".join([site.netlocs[4], source.get('name', None)])
        )).first()
        manga_id, manga_thumb, manga_slug = manga.id, manga.thumb, manga.slug
        ini_path = path.join(
            path.dirname(
                path.dirname(__file__)
            ),
            '/'.join(['rak', 'manga', manga_id])
        )

        r = requests.get(source.get('thumb'))
        path_img = '/'.join([ini_path, manga_thumb])
        print(path_img)
        if not path.exists(ini_path):
            makedirs(ini_path)
        with open(path_img, "wb") as code:
            code.write(r.content)

        chapters_info = series_info.get('chapters', [])
        for i, ch in enumerate(chapters_info[0:2]):
            print(ch.get('name', ''))
            # batoto slug
            slug_bt = ch.get('name', '')

            if ':' in slug_bt:
                slug_bt = slug_bt.split(':')
                slug_bt.pop(0)
                slug_bt = '-'.join(slug_bt)

            slug_chapter = ' '.join([manga_slug, slug_bt])
            # cek chapter sudah didownload
            chapter = Chapter.query.filter(Chapter.slug == utils.slugist(slug_chapter)).first()
            if chapter is None:

                v = utils.parse_number(ch.get('name', ''), "Vol")
                v = 0 if v is None else v
                c = utils.parse_number(ch.get('name', ''), "Ch")
                c = 0 if c is None else c

                with transaction.manager:
                    chapter = Chapter(
                        slug_bt,
                        c,
                        v
                    )
                    time = lt.human_to_date(ch.get('time', 'now'))
                    # chapter.id = utils.guid()
                    ch_manga = Manga.query.get(manga_id)
                    ch_manga.chapter_count += 1
                    chapter.lang = ISOLang.query.filter(ISOLang.iso == 'en').first()
                    chapter.updated = time
                    chapter.manga = ch_manga
                    # s = 1000v + c
                    # chapter.sortorder = (1000*float(v)) + float(c)
                    chapter.sortorder = float(c)
                    chapter.slug = slug_chapter
                    DBSession.add(chapter)
                    DBSession.flush()

                chapter = Chapter.query.filter(Chapter.slug == utils.slugist(slug_chapter)).first()

                # batoto
                html = site.get_html(ch.get('url'))
                # # ambil image dan download locally di folder chapter.id
                chapter_info = site.chapter_info(html)
                try:
                    # series info
                    # chapter info and images
                    session = FuturesSession(executor=ThreadPoolExecutor(max_workers=10))

                    for page in chapter_info.get('pages', []):
                        ini_chapter = '/'.join([ini_path, chapter.id])
                        print(page)
                        r = session.get(page).result()
                        if r.status_code != 200:
                            raise HtmlError('cannot fetch')
                        path_img = '/'.join([ini_chapter, page.split('/')[-1]])
                        print(path_img)
                        if not path.exists(ini_chapter):
                            makedirs(ini_chapter)
                        with open(path_img, "wb") as code:
                            code.write(r.content)
                except ConnectionError as Conn:
                    print(Conn)
                    chapter = Chapter.query.get(chapter.id)
                    DBSession.delete(chapter)
                    shutil.rmtree(ini_chapter)

    except AttributeError as e:
        print(e.message)
    except KeyError as e:
        print(e.message)
    except ValueError as e:
        print(e.message)