コード例 #1
0
def get_video(url, session, cw):
    print_ = get_print(cw)
    html = downloader.read_html(url, session=session)
    if "document.location.href='https://login." in html:
        raise errors.LoginRequired()
    soup = Soup(html)
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    print_('url_thumb: {}'.format(url_thumb))
    params = re.find('VodParameter *= *[\'"]([^\'"]+)[\'"]', html, err='No VodParameter')
    params += '&adultView=ADULT_VIEW&_={}'.format(int(time()*1000))
    url_xml = 'http://stbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params
    print(url_xml)
    html = downloader.read_html(url_xml, session=session, referer=url)
    soup = Soup(html)
    if '<flag>PARTIAL_ADULT</flag>' in html:
        raise errors.LoginRequired()
    title = soup.find('title').string.strip()
    urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html)
    if not urls_m3u8:
        raise Exception('no m3u8')
    streams = []
    for url_m3u8 in urls_m3u8:
        try:
            stream = _get_stream(url_m3u8)
        except Exception as e:
            print(e)
            continue #2193
        streams.append(stream)
    for stream in streams[1:]:
        streams[0] += stream
    stream = streams[0]
    id = url.split('/')[(-1)].split('?')[0].split('#')[0]
    video = Video(stream, url, id, title, url_thumb)
    return video
コード例 #2
0
def get_sd(url, session=None, html=None, cw=None, wait=True):
    print_ = get_print(cw)

    if html:
        soup = Soup(html)
        check_error(soup, cw, wait)
        for script in soup.findAll('script'):
            j = get_j(script)
            if j:
                break
        else:
            raise Exception('no _sharedData!!')
    else:
        for try_ in range(4):
            _wait(cw)
            html = read_html(url, session, cw)
            soup = Soup(html)
            check_error(soup, cw, wait)
            for script in soup.findAll('script'):
                j = get_j(script)
                if j:
                    break
            else:
                continue
            break
        else:
            raise Exception('no _sharedData')
    for script in soup.findAll('script'):
        s = script.string
        if s and 'window.__additionalDataLoaded(' in s:
            s = cut_pair(s)
            j_add = json.loads(s)
            try:
                j['entry_data']['PostPage'][0].update(j_add)
            except:
                j['entry_data']['ProfilePage'][0].update(j_add)  #2900

    # Challenge
    challenge = j['entry_data'].get('Challenge')
    if challenge:
        try:
            for cont in challenge[0]['extraData']['content']:
                title = cont.get('title')
                if title:
                    break
            else:
                raise Exception('no title')
        except:
            title = 'Err'
        raise errors.LoginRequired(title)

    # LoginAndSignupPage
    login = j['entry_data'].get('LoginAndSignupPage')
    if login:
        raise errors.LoginRequired()

    return j
コード例 #3
0
def get_imgs(url, title, cw=None):
    print_ = get_print(cw)
    imgs = []

    for p in range(1, 1001):
        url = setPage(url, p)
        print_(url)
        soup = read_soup(url)

        view = soup.find('div', class_='photos-list')
        if view is None:
            if p == 1:
                raise errors.LoginRequired()
            else:
                break # Guest user
        for img in view.findAll('img'):
            img = img.attrs['data-src']
            img = Image(img, url, len(imgs))
            imgs.append(img)
        
        pgn = soup.find('ul', class_='pagination')
        ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')] if pgn else []
        if not ps or p >= max(ps):
            print('max p')
            break

        msg =  '{} {}  ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps))
        if cw:
            cw.setTitle(msg)
        else:
            print(msg)

    return imgs
コード例 #4
0
def get_id(url, cw=None):
    for try_ in range(2):
        try:
            res = clf2.solve(url, cw=cw, f=_get_page_id)
            html = res['html']
            soup = Soup(html)
            if soup.find('div', class_='gn_login'):
                raise errors.LoginRequired()
            oid = _get_page_id(html)
            if not oid:
                raise Exception('no page_id')
            uids = re.findall(r'uid=([0-9]+)', html)
            uid = max(set(uids), key=uids.count)
            name = re.find(r"CONFIG\['onick'\]='(.+?)'", html) or soup.find(
                'div',
                class_=lambda c: c and c.startswith('ProfileHeader_name'
                                                    )).text.strip()
            if not name:
                raise Exception('no name')
            break
        except errors.LoginRequired as e:
            raise
        except Exception as e:
            e_ = e
            print(e)
    else:
        raise e_
    return uid, oid, name
コード例 #5
0
def get_soup_session(url, cw=None):
    print_ = get_print(cw)
    session = Session()
    res = clf2.solve(url, session=session, cw=cw)
    print_('{} -> {}'.format(url, res['url']))
    if res['url'].rstrip('/') == 'https://welovemanga.one':
        raise errors.LoginRequired()
    return Soup(res['html']), session
コード例 #6
0
    def get(self, url):
        print_ = get_print(self.cw)
        if self._url:
            return self._url
        ydl = ytdl.YoutubeDL(cw=self.cw)
        try:
            info = ydl.extract_info(url)
        except Exception as e:
            ex = type(ytdl.get_extractor(url))(ydl)
            _download_info = getattr(ex, '_download_info', None)
            if _download_info is not None:
                vod_id = ex._match_id(url)
                info = _download_info(vod_id)
                print_(info)
            if 'HTTPError 403' in str(e):
                raise errors.LoginRequired()
            raise

        def print_video(video):
            print_('[{}] [{}] [{}] {}'.format(video['format_id'],
                                              video.get('height'),
                                              video.get('tbr'), video['url']))

        videos = [video for video in info['formats'] if video.get('height')]

        videos = sorted(videos,
                        key=lambda video:
                        (video.get('height', 0), video.get('tbr', 0)),
                        reverse=True)

        for video in videos:
            print_video(video)

        for video in videos:
            if video.get('height', 0) <= get_resolution():  #3723
                video_best = video
                break
        else:
            video_best = videos[-1]
        print_video(video)

        video = video_best['url']

        ext = get_ext(video)
        self.title = info['title']
        id = info['display_id']

        if ext.lower() == '.m3u8':
            video = M3u8_stream(video, n_thread=4, alter=alter)
            ext = '.mp4'
        self.filename = format_filename(self.title, id, ext)
        self.url_thumb = info['thumbnail']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        self._url = video
        return self._url
コード例 #7
0
def get_title(soup):
    html = str(soup)
    name = re.find(r'"__isProfile":"Page","name":(".*?")', html) or re.find(
        r'"name":(".*?")', html)
    if not name:
        gc = soup.find('div', id='globalContainer')
        if gc and gc.find('form', id='login_form'):
            raise errors.LoginRequired()
        raise Exception('no name')
    title = json.loads(name)
    return title
コード例 #8
0
    def login_reqd_func(self, *args, **kwargs):
        if isinstance(self, reddit.Reddit):
            user = self.user
            modhash = self.modhash
        else:
            user = self.reddit_session.user
            modhash = self.reddit_session.modhash

        if user is None or modhash is None:
            raise errors.LoginRequired('"%s" requires login.' % func.__name__)
        else:
            return func(self, *args, **kwargs)
コード例 #9
0
def get_session(url, cw=None):
    #res = clf2.solve(url, cw=cw)
    #return res['session']
    session = Session()
    sessionid = session.cookies._cookies.get('.instagram.com', {}).get('/',{}).get('sessionid')
    if sessionid is None or sessionid.is_expired():
        raise errors.LoginRequired()
    session.headers['User-Agent'] = downloader.hdr['User-Agent']
    if not session.cookies.get('csrftoken', domain='.instagram.com'):
        csrf_token = generate_csrf_token()
        print('csrf:', csrf_token)
        session.cookies.set("csrftoken", csrf_token, domain='.instagram.com')
    return session
コード例 #10
0
def check_error(soup, cw, wait):
    print_ = get_print(cw)

    if len(soup.html) < 1000:  #4014
        raise errors.LoginRequired(soup.html)

    err = soup.find('div', class_='error-container')
    if err:
        err = err.text.strip()
        if wait:
            print_('err: {}'.format(err))
            sleep(60 * 30, cw)
        else:
            raise Exception(err)
コード例 #11
0
def extract_info(url, cw=None):
    ydl = ytdl.YoutubeDL(cw=cw)
    try:
        info = ydl.extract_info(url)
    except Exception as e:
        ex = type(ytdl.get_extractor(url))(ydl)
        _download_info = getattr(ex, '_download_info', None)
        if _download_info is not None:
            vod_id = ex._match_id(url)
            info = _download_info(vod_id)
            print_(info)
        if 'HTTPError 403' in str(e):
            raise errors.LoginRequired()
        raise
    return info
コード例 #12
0
    def get_albums(page):
        url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(
            uid, page, int(time() * 1000))
        referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)
        html = downloader.read_html(url, referer, session=session)
        if '<title>新浪通行证</title>' in html:
            raise errors.LoginRequired()
        j = json.loads(html)
        data = j['data']
        albums = []
        for album in data['album_list']:
            id = album['album_id']
            type = album['type']
            album = Album(id, type)
            albums.append(album)

        return albums
コード例 #13
0
 def call(self, path, qs, default_qs=True):
     if default_qs:
         qs_new = qs
         qs = self._qs.copy()
         qs.update(qs_new)
     url = self._url_base + path
     url = update_url_query(url, qs)
     r = self.session.get(url, headers=self._hdr)
     data = r.json()
     errs = data.get('errors', [])
     if errs:
         code = int(errs[0]['code'])
         if code == 0:
             raise Exception('Not found')
         elif code == 4012:
             raise errors.LoginRequired(errs[0]['detail'])
     r.raise_for_status()
     return data['response']
コード例 #14
0
def get_imgs(url, title, cw=None):
    print_ = get_print(cw)
    imgs = []

    for p in range(1, 1001):
        url = setPage(url, p)
        print_(url)
        for try_ in range(4):
            try:
                html = downloader.read_html(
                    url, user_agent=downloader.hdr['User-Agent'])
                #sleep(1)
                break
            except Exception as e:
                print(e)
        else:
            raise
        soup = Soup(html)

        view = soup.find('div', class_='photos-list')
        if view is None:
            if p == 1:
                raise errors.LoginRequired()
            else:
                break  # Guest user
        for img in view.findAll('img'):
            img = img.attrs['data-src']
            img = Image(img, url, len(imgs))
            imgs.append(img)

        pgn = soup.find('ul', class_='pagination')
        ps = [getPage(a.attrs['href']) for a in pgn.findAll('a')]
        if p >= max(ps):
            print('max p')
            break

        msg = '{} {}  ({} / {})'.format(tr_('읽는 중...'), title, p, max(ps))
        if cw:
            cw.setTitle(msg)
        else:
            print(msg)

    return imgs
コード例 #15
0
def get_id(url, cw=None):
    for try_ in range(2):
        try:
            res = clf2.solve(url, cw=cw, f=_get_page_id)
            html = res['html']
            soup = Soup(html)
            if soup.find('div', class_='gn_login'):
                raise errors.LoginRequired()
            m = _get_page_id(html)
            if not m:
                raise Exception('no page_id')
            oid = m.groups()[0]
            uids = re.findall('uid=([0-9]+)', html)
            uid = max(set(uids), key=uids.count)
            name = re.findall("CONFIG\\['onick'\\]='(.+?)'", html)[0]
            break
        except errors.LoginRequired as e:
            raise
        except Exception as e:
            e_ = e
            print(e)
    else:
        raise e_
    return uid, oid, name
コード例 #16
0
 def init(self):
     self.session = Session() # 1791
     if 'pornhubpremium.com' in self.url.lower() and\
        not is_login(self.session, self.cw):
         raise errors.LoginRequired()
コード例 #17
0
def checkLogin(session):
    c = session.cookies._cookies.get('.weibo.com', {}).get('/', {}).get('SUBP')
    if not c or c.is_expired():
        raise errors.LoginRequired()
コード例 #18
0
def my_id():
    sid = Session().cookies.get('PHPSESSID', domain='.pixiv.net')
    if not sid:
        raise errors.LoginRequired()
    return re.find(r'^([0-9]+)', sid, err='no userid')
コード例 #19
0
def get_info(url, cw=None, depth=0):
    print_ = get_print(cw)
    api = PixivAPI()
    info = {}
    imgs = []

    if utils.ui_setting:
        ugoira_ext = [None, '.gif', '.webp',
                      '.png'][utils.ui_setting.ugoira_convert.currentIndex()]
    else:
        ugoira_ext = None
    if utils.ui_setting:
        format_ = compatstr(utils.ui_setting.pixivFormat.currentText())
    else:
        format_ = 'id_ppage'

    max_pid = get_max_range(cw)

    if api.illust_id(url):  # Single post
        id_ = api.illust_id(url)
        data = api.illust(id_)
        login = '******' not in data
        if FORCE_LOGIN and not login:  #
            raise errors.LoginRequired()
        if data['xRestrict'] and not login:
            raise errors.LoginRequired('R-18')
        info['artist'] = data['userName']
        info['artist_id'] = data['userId']
        info['raw_title'] = data['illustTitle']
        info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_)
        info['create_date'] = parse_time(data['createDate'])
        tags_illust = set(tag['tag'] for tag in data['tags']['tags'])

        if tags_matched(tags_illust, cw):
            if data['illustType'] == 2:  # ugoira
                data = api.ugoira_meta(id_)
                ugoira = {
                    'ext': ugoira_ext,
                    'delay': [frame['delay'] for frame in data['frames']],
                }
                img = Image(data['originalSrc'],
                            url,
                            id_,
                            0,
                            format_,
                            info,
                            cw,
                            ugoira=ugoira)
                imgs.append(img)
            else:
                data = api.pages(id_)
                for img in data:
                    img = Image(img['urls']['original'], url, id_, len(imgs),
                                format_, info, cw)
                    imgs.append(img)
        else:
            print('tags mismatched')
    elif '/bookmarks/' in url or 'bookmark.php' in url:  # User bookmarks
        id_ = api.user_id(url)
        if id_ is None:  #
            id_ = my_id()
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'],
                                                   info['artist_id'])
        ids = []
        ids_set = set()
        offset = 0
        while len(ids) < max_pid:
            data = api.bookmarks(id_, offset)
            c = 0
            for id in [work['id'] for work in data['works']]:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            offset += LIMIT
            if depth == 0:
                check_alive(cw)
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    elif '/tags/' in url or 'search.php' in url:  # Search
        q = unquote(
            re.find(r'/tags/([^/]+)', url)
            or re.find('[?&]word=([^&]*)', url, err='no tags'))
        info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+'))
        qs = query_url(url)
        order = qs.get('order', ['date_d'])[0]
        mode = qs.get('mode', ['all'])[0]
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            data = api.search(q, order, mode, p=p)
            c = 0
            for id in [
                    illust['id'] for illust in data['illustManga']['data']
                    if 'id' in illust
            ]:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url:  # Newest works: Following
        r18 = 'bookmark_new_illust_r18.php' in url
        id_ = my_id()
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_following_{}{})'.format(
            info['artist'], 'r18_' if r18 else '', info['artist_id'])
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            c = 0
            for id in api.following(p, r18=r18):
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    elif api.user_id(url):  # User illusts
        id_ = api.user_id(url)
        process_user(id_, info, api)
        data = api.profile(id_)
        info['title'] = '{} (pixiv_{})'.format(info['artist'],
                                               info['artist_id'])
        ids = []
        for illusts in [data['illusts'], data['manga']]:
            if not illusts:
                continue
            ids += list(illusts.keys())
        ids = sorted(ids, key=int, reverse=True)
        process_ids(ids[:max_pid], info, imgs, cw, depth)
    else:
        raise NotImplementedError()
    info['imgs'] = imgs[:max_pid]

    return info
コード例 #20
0
def get_info(url, cw=None, depth=0, tags_add=None):
    print_ = get_print(cw)
    api = PixivAPI()
    info = {}
    imgs = []

    ugoira_ext = [None, '.gif', '.webp', '.png'
                  ][utils.ui_setting.ugoira_convert.currentIndex(
                  )] if utils.ui_setting else None
    format_ = compatstr(utils.ui_setting.pixivFormat.currentText()
                        ) if utils.ui_setting else 'id_ppage'

    max_pid = get_max_range(cw)

    if api.illust_id(url):  # Single post
        id_ = api.illust_id(url)
        data = api.illust(id_)
        login = '******' not in data
        if FORCE_LOGIN and not login:  #
            raise errors.LoginRequired()
        if data['xRestrict'] and not login:
            raise errors.LoginRequired('R-18')
        info['artist'] = data['userName']
        info['artist_id'] = data['userId']
        info['raw_title'] = data['illustTitle']
        info['title'] = '{} (pixiv_illust_{})'.format(info['raw_title'], id_)
        info['create_date'] = parse_time(data['createDate'])
        tags_illust = set(tag['tag'] for tag in data['tags']['tags'])

        if tags_matched(tags_illust, tags_add, cw):
            if data['illustType'] == 2:  # ugoira
                data = api.ugoira_meta(id_)
                ugoira = {
                    'ext': ugoira_ext,
                    'delay': [frame['delay'] for frame in data['frames']],
                }
                img = Image(data['originalSrc'],
                            url,
                            id_,
                            0,
                            format_,
                            info,
                            cw,
                            ugoira=ugoira)
                imgs.append(img)
            else:
                data = api.pages(id_)
                for img in data:
                    img = Image(img['urls']['original'], url, id_, len(imgs),
                                format_, info, cw)
                    imgs.append(img)
        else:
            print('tags mismatched')
    elif '/bookmarks/' in url or 'bookmark.php' in url:  # User bookmarks
        id_ = api.user_id(url)
        if id_ is None:  #
            id_ = my_id()
        if id_ == my_id():
            rests = ['show', 'hide']
        else:
            rests = ['show']
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'],
                                                   info['artist_id'])
        ids = []
        ids_set = set()
        for rest in rests:
            offset = 0
            while len(ids) < max_pid:
                data = api.bookmarks(id_, offset, rest=rest)
                c = 0
                for id in [work['id'] for work in data['works']]:
                    if id in ids_set:
                        continue
                    ids_set.add(id)
                    ids.append(id)
                    c += 1
                if not c:
                    break
                offset += LIMIT
                if depth == 0:
                    check_alive(cw)
        process_ids(ids, info, imgs, cw, depth)
    elif '/tags/' in url or 'search.php' in url:  # Search
        q = unquote(
            re.find(r'/tags/([^/]+)', url)
            or re.find('[?&]word=([^&]*)', url, err='no tags'))
        info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+'))
        qs = query_url(url)
        order = qs.get('order', ['date_d'])[0]
        mode = qs.get('mode', ['all'])[0]
        s_mode = qs.get('s_mode', ['s_tag_full'])[0]
        scd = qs.get('scd', [None])[0]
        ecd = qs.get('ecd', [None])[0]
        type_ = qs.get('type', ['all'])[0]
        wlt = qs.get('wlt', [None])[0]
        wgt = qs.get('wgt', [None])[0]
        hlt = qs.get('hlt', [None])[0]
        hgt = qs.get('hgt', [None])[0]
        blt = qs.get('blt', [None])[0]
        bgt = qs.get('bgt', [None])[0]
        ratio = qs.get('ratio', [None])[0]
        tool = qs.get('tool', [None])[0]
        logs = [
            'order: {}'.format(order),
            'mode: {}'.format(mode),
            's_mode: {}'.format(s_mode),
            'scd / ecd: {} / {}'.format(scd, ecd),
            'type: {}'.format(type_),
            'wlt /  wgt: {} / {}'.format(wlt, wgt),
            'hlt / hgt: {} / {}'.format(hlt, hgt),
            'blt / bgt: {} / {}'.format(blt, bgt),
            'ratio: {}'.format(ratio),
            'tool: {}'.format(tool),
        ]
        print_('\n'.join(logs))
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            data = api.search(q,
                              order,
                              mode,
                              p=p,
                              s_mode=s_mode,
                              scd=scd,
                              ecd=ecd,
                              type_=type_,
                              wlt=wlt,
                              wgt=wgt,
                              hlt=hlt,
                              hgt=hgt,
                              blt=blt,
                              bgt=bgt,
                              ratio=ratio,
                              tool=tool)
            c = 0
            for id in [
                    illust['id'] for illust in data['illustManga']['data']
                    if 'id' in illust
            ]:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids, info, imgs, cw, depth)
    elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url:  # Newest works: Following
        r18 = 'bookmark_new_illust_r18.php' in url
        id_ = my_id()
        process_user(id_, info, api)
        info['title'] = '{} (pixiv_following_{}{})'.format(
            info['artist'], 'r18_' if r18 else '', info['artist_id'])
        ids = []
        ids_set = set()
        p = 1
        while len(ids) < max_pid:
            data = api.following(p, r18=r18)
            c = 0
            for id in data['page']['ids']:
                if id in ids_set:
                    continue
                ids_set.add(id)
                ids.append(id)
                c += 1
            if not c:
                break
            p += 1
        process_ids(ids, info, imgs, cw, depth)
    elif api.user_id(url):  # User illusts
        m = re.search(r'/users/[0-9]+/([\w]+)/?([^\?#/]*)', url)
        type_ = {
            'illustrations': 'illusts',
            'manga': 'manga'
        }.get(m and m.groups()[0])
        if type_:
            types = [type_]
        else:
            types = ['illusts', 'manga']
        if m:
            tag = unquote(m.groups()[1]) or None
        else:
            tag = None
        print_('types: {}, tag: {}'.format(types, tag))

        id_ = api.user_id(url)
        process_user(id_, info, api)
        data = api.profile(id_)
        info['title'] = '{} (pixiv_{})'.format(info['artist'],
                                               info['artist_id'])

        ids = []
        for type_ in types:
            illusts = data[type_]
            if not illusts:
                continue
            ids += list(illusts.keys())
        ids = sorted(ids, key=int, reverse=True)
        if not ids:
            raise Exception('no imgs')
        process_ids(ids,
                    info,
                    imgs,
                    cw,
                    depth,
                    tags_add=[tag] if tag else None)
    else:
        raise NotImplementedError()
    info['imgs'] = imgs[:max_pid]

    return info
コード例 #21
0
def get_imgs(username, title, cw=None):
    urls = [
        'https://m.facebook.com/{}/photos'.format(username),
        'https://m.facebook.com/profile.php?id={}&sk=photos'.format(
            username),  # no custom URL
    ]

    for url in urls:
        print('get_imgs url:', url)
        try:
            html = read_html(url)
        except:
            continue
        soup = Soup(html)
        if soup.find('a', id='signup-button'):
            raise errors.LoginRequired()

        photo = soup.find('div', class_='_5v64')
        if photo is not None:
            break
    else:
        raise Exception('No photo div')

    cursor = photo.a.attrs['href'].split('/photos/')[1].split('/')[1]
    print('first cursor:', cursor)

    href = re.find(r'(/photos/pandora/\?album_token=.+?)"', html)
    href = urljoin(url, href)
    href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href)

    cursors = set([cursor])

    imgs = []

    dups = {}
    dir = os.path.join(get_outdir('facebook'), title)
    try:
        filenames = os.listdir(dir)
    except:
        filenames = []
    for filename in filenames:
        name, ext = os.path.splitext(filename)
        if name.isdigit():
            dups[int(name)] = os.path.join(dir, filename)

    pages = set()

    while True:
        print(href)
        html = read_html(href)
        data_raw = html.replace('for (;;);', '')
        data = json.loads(data_raw)
        actions = data['payload']['actions']
        for action in actions:
            if action['target'] == 'm_more_photos':
                break
        else:
            print('No more photos')
            break
        html = action['html']
        soup = Soup(html)
        photos = soup.findAll('div', class_='_5v64')
        for photo in photos:
            for a in photo.findAll('a'):
                page = a.attrs['href']
                page = urljoin(href, page)

                # remove duplicate pages
                if page in pages:
                    continue
                pages.add(page)

                img = Image(page)
                id = img.id
                if id in dups and getsize(dups[id]) > 0:
                    print('skip', id)
                    imgs.append(dups[id])
                else:
                    imgs.append(img)

        s = u'{} {} - {}'.format(tr_(u'읽는 중...'), title, len(imgs))
        if cw is not None:
            cw.setTitle(s)
            if not cw.alive:
                return []
        else:
            print(s)

        cursor = re.find(PATTERN_CURSOR, data_raw)
        #print(cursor)
        if cursor is None:
            print('no cursor')
            break
        if cursor in cursors:
            print('same cursor')
            break
        cursors.add(cursor)

        href = re.sub('&cursor=[0-9]+', '&cursor={}'.format(cursor), href)

    return imgs
コード例 #22
0
        def wrapped(cls, *args, **kwargs):
            obj = getattr(cls, 'lex_session', cls)

            if login and not obj.is_logged_in():
                raise errors.LoginRequired(function.__name__)