예제 #1
0
def get_video(url, session):
    while url.strip().endswith('/'):
        url = url[:-1]

    html = downloader.read_html(url, session=session)
    soup = Soup(html)
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    params = re.findall('VodParameter *= *[\'"]([^\'"]+)[\'"]', html)[0]
    params += '&adultView=ADULT_VIEW&_={}'.format(int(time() * 1000))
    url_xml = 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params
    print(url_xml)
    html = downloader.read_html(url_xml, session=session, referer=url)
    soup = Soup(html)
    title = soup.find('title').string.strip()
    urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html)
    if not urls_m3u8:
        raise Exception('no m3u8')
    streams = []
    for url_m3u8 in urls_m3u8:
        try:
            stream = _get_stream(url_m3u8)
        except Exception as e:
            print(e)
            continue  #2193
        streams.append(stream)
    for stream in streams[1:]:
        streams[0] += stream
    stream = streams[0]
    id = url.split('/')[(-1)].split('?')[0].split('#')[0]
    video = Video(stream, url, id, title, url_thumb)
    return video
예제 #2
0
def get_video(url, session, cw):
    print_ = get_print(cw)
    html = downloader.read_html(url, session=session)
    if "document.location.href='https://login." in html:
        raise errors.LoginRequired()
    soup = Soup(html)
    url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
    print_('url_thumb: {}'.format(url_thumb))
    params = re.find('VodParameter *= *[\'"]([^\'"]+)[\'"]', html, err='No VodParameter')
    params += '&adultView=ADULT_VIEW&_={}'.format(int(time()*1000))
    url_xml = 'http://stbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params
    print(url_xml)
    html = downloader.read_html(url_xml, session=session, referer=url)
    soup = Soup(html)
    if '<flag>PARTIAL_ADULT</flag>' in html:
        raise errors.LoginRequired()
    title = soup.find('title').string.strip()
    urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html)
    if not urls_m3u8:
        raise Exception('no m3u8')
    streams = []
    for url_m3u8 in urls_m3u8:
        try:
            stream = _get_stream(url_m3u8)
        except Exception as e:
            print(e)
            continue #2193
        streams.append(stream)
    for stream in streams[1:]:
        streams[0] += stream
    stream = streams[0]
    id = url.split('/')[(-1)].split('?')[0].split('#')[0]
    video = Video(stream, url, id, title, url_thumb)
    return video
def _guest_token(session, headers, cache=True):
    global CACHE_GUEST_TOKEN
    token = None
    if cache:
        if CACHE_GUEST_TOKEN and time(
        ) - CACHE_GUEST_TOKEN[1] < TIMEOUT_GUEST_TOKEN:
            token = CACHE_GUEST_TOKEN[0]
    if token is None:
        print('!!! get guest_token')
        name = 'x-guest-token'
        if name in headers:
            del headers[name]
        r = session.post('https://api.twitter.com/1.1/guest/activate.json',
                         headers=headers)
        data = json.loads(r.text)
        token = data['guest_token']
        CACHE_GUEST_TOKEN = token, time()
    return token
예제 #4
0
def get_imgs(page, session, cw):
    print_ = get_print(cw)

    if not downloader.cookiejar.get(
            'PROF', domain='.daum.net') and page.serviceType != 'free':  #3314
        raise NotPaidError()

    html = downloader.read_html(page.url, session=session)
    header, id = get_id(page.url)
    t = int(time())
    soup = Soup(html)
    type_ = header_to_type(header)

    url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(
        type_, id, t)
    data_raw = downloader.read_html(url_data,
                                    session=session,
                                    referer=page.url)
    data = json.loads(data_raw)
    if header == 'league_':
        m_type = None
    else:
        m_type = data['data']['webtoonEpisode']['multiType']
    print_('m_type: {}'.format(m_type))

    if m_type == 'chatting':
        page.url = page.url.replace('daum.net/', 'daum.net/m/')
        url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(
            type_, id, t)
        data_raw = downloader.read_html(url_data,
                                        session=session,
                                        referer=page.url)
        data = json.loads(data_raw)
        imgs = []
        for chat in data['data']['webtoonEpisodeChattings']:
            img = chat.get('image')
            if not img:
                continue
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)
    else:
        url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(
            type_, id, t)
        data_raw = downloader.read_html(url_data,
                                        session=session,
                                        referer=page.url)
        data = json.loads(data_raw)
        if not data.get('data'):
            raise NotPaidError()
        imgs = []
        for img in data['data']:
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)

    return imgs
예제 #5
0
def get_video(url, session=None):
    if session is None:
        session = Session()
        session.headers['User-Agent'] = downloader.hdr['User-Agent']
    session.headers['X-Directive'] = 'api'
    html = downloader.read_html(url, session=session)
    soup = Soup(html)
    for script in soup.findAll('script'):
        script = script.text or script.string or ''
        data = re.find('window.__NUXT__=(.+)', script)
        if data is not None:
            data = data.strip()
            if data.endswith(';'):
                data = data[:-1]
            data = json.loads(data)
            break
    else:
        raise Exception('No __NUXT__')

    info = data['state']['data']['video']['hentai_video']
    query = info['slug']
    #url_api = 'https://members.hanime.tv/api/v3/videos_manifests/{}?'.format(query) # old
    url_api = 'https://hanime.tv/rapi/v7/videos_manifests/{}?'.format(
        query)  # new
    print(url_api)
    hdr = {
        'x-signature':
        ''.join('{:x}'.format(randrange(16)) for i in range(32)),
        'x-signature-version': 'web2',
        'x-time': str(int(time())),
    }
    r = session.get(url_api, headers=hdr)
    print(r)
    data = json.loads(r.text)
    streams = []
    for server in data['videos_manifest']['servers']:
        streams += server['streams']

    streams_good = []
    for stream in streams:
        url_video = stream['url']
        if not url_video or 'deprecated.' in url_video:
            continue
        streams_good.append(stream)

    if not streams_good:
        raise Exception('No video available')
    print('len(streams_good):', len(streams_good))
    for stream in streams_good:
        print(stream['extension'], stream['width'], stream['filesize_mbs'],
              stream['url'])

    stream = streams_good[0]
    return Video(info, stream), session
    def __init__(self, session, cw=None):
        self.session = session
        self.cw = cw
        csrf = session.cookies.get('ct0', domain='.twitter.com')
        print('csrf:', csrf)
        if not csrf:
            csrf = hashlib.md5(str(time()).encode()).hexdigest()
        hdr = {
            "authorization": AUTH,
            "x-twitter-client-language": "en",
            "x-twitter-active-user": "******",
            "x-csrf-token": csrf,
            "Origin": "https://twitter.com",
            }
        session.headers.update(hdr)
        session.cookies.set('ct0', csrf, domain='.twitter.com')
            

        if session.cookies.get("auth_token", domain=".twitter.com"):
            session.headers["x-twitter-auth-type"] = "OAuth2Session"
        else:
            # guest token
            guest_token = _guest_token(session.headers)
            session.headers["x-guest-token"] = guest_token
            session.cookies.set("gt", guest_token, domain=".twitter.com")

        self.params = {
            "include_profile_interstitial_type": "1",
            "include_blocking": "1",
            "include_blocked_by": "1",
            "include_followed_by": "1",
            "include_want_retweets": "1",
            "include_mute_edge": "1",
            "include_can_dm": "1",
            "include_can_media_tag": "1",
            "skip_status": "1",
            "cards_platform": "Web-12",
            "include_cards": "1",
            "include_composer_source": "true",
            "include_ext_alt_text": "true",
            "include_reply_count": "1",
            "tweet_mode": "extended",
            "include_entities": "true",
            "include_user_entities": "true",
            "include_ext_media_color": "true",
            "include_ext_media_availability": "true",
            "send_error_codes": "true",
            "simple_quoted_tweet": "true",
            #  "count": "20",
            "count": "100",
            #"cursor": None,
            "ext": "mediaStats%2ChighlightedLabel%2CcameraMoment",
            "include_quote_count": "true",
        }
예제 #7
0
def _guest_token(session, headers, cache=True, cw=None):
    global CACHE_GUEST_TOKEN
    print_ = get_print(cw)
    token = None
    if cache:
        if CACHE_GUEST_TOKEN and time(
        ) - CACHE_GUEST_TOKEN[1] < TIMEOUT_GUEST_TOKEN:
            token = CACHE_GUEST_TOKEN[0]
    if token is None:
        print('!!! get guest_token')
        name = 'x-guest-token'
        if name in headers:
            del headers[name]
        r = session.post('https://api.twitter.com/1.1/guest/activate.json',
                         headers=headers)
        data = json.loads(r.text)
        token = data['guest_token']
        print_('token type: {}'.format(type(token)))
        if isinstance(token, int):  #3525
            token = str(token)
        CACHE_GUEST_TOKEN = token, time()
    return token
예제 #8
0
    def get_albums(page):
        url = 'https://photo.weibo.com/albums/get_all?uid={}&page={}&count=20&__rnd={}'.format(
            uid, page, int(time() * 1000))
        referer = 'https://photo.weibo.com/{}/albums?rd=1'.format(uid)
        html = downloader.read_html(url, referer, session=session)
        j = json.loads(html)
        data = j['data']
        albums = []
        for album in data['album_list']:
            id = album['album_id']
            type = album['type']
            album = Album(id, type)
            albums.append(album)

        return albums
예제 #9
0
def get_imgs(page, session, cw):
    print_ = get_print(cw)
    html = downloader.read_html(page.url, session=session)
    header, id = get_id(page.url)
    t = int(time())
    soup = Soup(html)
    if 'league_' in id:
        type_ = 'leaguetoon'
    else:
        type_ = 'webtoon'

    url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(
        type_, id, t)
    data_raw = downloader.read_html(url_data,
                                    session=session,
                                    referer=page.url)
    data = json.loads(data_raw)
    m_type = data['data']['webtoonEpisode']['multiType']
    print_('m_type: {}'.format(m_type))

    if m_type == 'chatting':
        page.url = page.url.replace('daum.net/', 'daum.net/m/')
        url_data = 'http://webtoon.daum.net/data/mobile/{}/viewer?id={}&{}'.format(
            type_, id, t)
        data_raw = downloader.read_html(url_data,
                                        session=session,
                                        referer=page.url)
        data = json.loads(data_raw)
        imgs = []
        for chat in data['data']['webtoonEpisodeChattings']:
            img = chat.get('image')
            if not img:
                continue
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)
    else:
        url_data = 'http://webtoon.daum.net/data/pc/{}/viewer_images/{}?timeStamp={}'.format(
            type_, id, t)
        data_raw = downloader.read_html(url_data,
                                        session=session,
                                        referer=page.url)
        data = json.loads(data_raw)
        imgs = []
        for img in data['data']:
            img = Image(img['url'], page, len(imgs))
            imgs.append(img)

    return imgs
예제 #10
0
    def get_album_imgs(album, page):
        url = 'https://photo.weibo.com/photos/get_all?uid={}&album_id={}&count=30&page={}&type={}&__rnd={}'.format(
            uid, album.id, page, album.type, int(time() * 1000))
        referer = 'https://photo.weibo.com/{}/talbum/index'.format(uid)
        html = downloader.read_html(url, referer, session=session, timeout=30)
        j = json.loads(html)
        data = j['data']
        imgs = []
        for photo in data['photo_list']:
            host = photo['pic_host']
            name = photo['pic_name']
            id = photo['photo_id']
            timestamp = photo['timestamp']
            date = datetime.fromtimestamp(timestamp)
            t = '{:02}-{:02}-{:02}'.format(date.year % 100, date.month,
                                           date.day)
            url = '{}/large/{}'.format(host, name)
            ext = os.path.splitext(name)[1]
            filename = '[{}] {}{}'.format(t, id, ext)
            img = Image(url, filename, timestamp)
            imgs.append(img)

        return imgs
예제 #11
0
def get_info(url, session):
    referer = url
    header, id = get_id(referer)
    type_ = header_to_type(header)

    info = {}
    ids = set()
    pages = []
    for p in range(1, 1 + 10):
        if p == 1:
            url = 'http://webtoon.daum.net/data/pc/{}/view/{}?timeStamp={}'.format(
                type_, id, int(time()))
        else:
            if type_ == 'webtoon':
                break
            url = 'http://webtoon.daum.net/data/pc/{}/view/{}?page_no={}&timeStamp={}'.format(
                type_, id, p, int(time()))
        print(url)
        info_raw = downloader.read_html(url, referer=referer, session=session)
        _info = json.loads(info_raw)
        webtoon = _info['data'].get('webtoon') or _info['data'].get(
            'leaguetoon')
        if webtoon is None:
            raise Exception('No webtoon')

        if p == 1:
            info['title'] = webtoon['title']
            artists = []
            for artist in webtoon['cartoon']['artists']:
                artist = artist['penName']
                if artist in artists:
                    continue
                artists.append(artist)

            if len(artists) > 1:
                artists = [artists[1], artists[0]] + artists[2:]
            info['artists'] = artists

        eps = webtoon.get('webtoonEpisodes') or webtoon.get(
            'leaguetoonEpisodes')
        if not eps:
            if p > 1:
                eps = []
            else:
                raise Exception('No eps')
        c = 0
        for ep in eps:
            id_ = ep.get('articleId') or ep.get('id')
            title = ep['title']
            serviceType = 'free' if type_ == 'leaguetoon' else ep['serviceType']
            if type_ == 'leaguetoon':
                url = 'http://webtoon.daum.net/league/viewer/{}'.format(id_)
            else:
                url = 'http://webtoon.daum.net/webtoon/viewer/{}'.format(id_)
            if id_ in ids:
                continue
            c += 1
            ids.add(id_)
            page = Page(id_, url, title, serviceType)
            pages.append(page)
        if c == 0:
            print('c == 0; break')
            break

    info['pages'] = sorted(pages, key=lambda x: x.id)
    return info