Esempio n. 1
0
def enter():
    print('enter')
    session = Session()

    r = session.get(URL_ENTER)

    # 862
    html = r.text
    soup = Soup(html)
    box = soup.find('aside', id='FilterBox')
    data = {}
    for select in box.findAll('select'):
        name = select.attrs['name']
        value = select.findAll('option')[-1].attrs['value']
        print(name, value)
        data[name] = value
    for input in box.findAll('input'):
        name = input.attrs['name']
        value = input.attrs['value']
        if name.startswith('rating_') or 'CSRF_TOKEN' in name:
            print(name, value)
            data[name] = value
    data.update({
        'filter_media': 'A',
        'filter_order': 'date_new',
        'filter_type': '0',
    })
    r = session.post(URL_FILTER, data=data, headers={'Referer': r.url})
    print(r)

    return session
def get_videos(url, cw=None):
    print_ = get_print(cw)
    info = {}
    user_id = re.find(r'twitch.tv/([^/?]+)', url, err='no user_id')
    print(user_id)
    session = Session()
    r = session.get(url)
    s = cut_pair(re.find(r'headers *: *({.*)', r.text, err='no headers'))
    print(s)
    headers = json_loads(s)

    payload = [{
        'operationName': 'ClipsCards__User',
        'variables': {
            'login': user_id,
            'limit': 20,
            'criteria': {
                'filter': 'ALL_TIME'
            }
        },
        'extensions': {
            'persistedQuery': {
                'version':
                1,
                'sha256Hash':
                'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777'
            }
        },
    }]
    videos = []
    cursor = None
    cursor_new = None
    while True:
        if cursor:
            payload[0]['variables']['cursor'] = cursor
        r = session.post('https://gql.twitch.tv/gql',
                         json=payload,
                         headers=headers)
        #print(r)
        data = r.json()
        for edge in data[0]['data']['user']['clips']['edges']:
            url_video = edge['node']['url']
            info['name'] = edge['node']['broadcaster']['displayName']
            video = Video(url_video)
            video.id = int(edge['node']['id'])
            videos.append(video)
            cursor_new = edge['cursor']
        print_('videos: {} / cursor: {}'.format(len(videos), cursor))
        if cursor == cursor_new:
            print_('same cursor')
            break
        if cursor_new is None:
            break
        cursor = cursor_new
    if not videos:
        raise Exception('no videos')
    info['videos'] = sorted(videos, key=lambda video: video.id, reverse=True)
    return info
def read_channel(url_page, cw=None):
    print_ = get_print(cw)
    res = re.find(CHANNEL_PATTERN, url_page)
    if res is None:
        raise Exception('Not channel')
    header, username = res
    print(header, username)
    max_pid = get_max_range(cw)
    info = {}
    info['header'] = header
    info['username'] = username
    session = Session()
    urls = []
    ids = set()
    for p in range(100):
        url_api = urljoin(url_page,
                          '/{}/{}/videos/best/{}'.format(header, username, p))
        print_(url_api)
        r = session.post(url_api)
        data = json.loads(r.text)

        videos = data.get('videos')  #4530
        if not videos:
            print_('empty')
            break

        for video in videos:
            id_ = video['id']
            if id_ in ids:
                print_('duplicate: {}'.format(id_))
                continue
            ids.add(id_)
            info['name'] = video['pn']
            urls.append(urljoin(url_page, video['u']))

        if len(urls) >= max_pid:
            break

        n = data['nb_videos']

        s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls))
        if cw:
            cw.setTitle(s)
        else:
            print(s)
        if len(ids) >= n:
            break
        sleep(1, cw)
    if not urls:
        raise Exception('no videos')
    info['urls'] = urls[:max_pid]
    return info
    def get(self, url):
        if self._url_video:
            return self._url_video
        cw = self.cw
        print_ = get_print(cw)
        html = downloader.read_html(url)
        soup = Soup(html)

        embedUrl = extract('embedUrl', html, cw)
        if embedUrl:
            raise EmbedUrlError('[pandoratv] EmbedUrl: {}'.format(embedUrl))
        
        uid = extract('strLocalChUserId', html, cw)
        pid = extract('nLocalPrgId', html, cw)
        fid = extract('strFid', html, cw)
        resolType = extract('strResolType', html, cw)
        resolArr = extract('strResolArr', html, cw)
        vodSvr = extract('nVodSvr', html, cw)
        resols = extract('nInfo', html, cw)
        runtime = extract('runtime', html, cw)

        url_api = 'http://www.pandora.tv/external/getExternalApi/getVodUrl/'
        data = {
            'userId': uid,
            'prgId': pid,
            'fid': fid,
            'resolType': resolType,
            'resolArr': ','.join(map(str, resolArr)),
            'vodSvr': vodSvr,
            'resol': max(resols),
            'runtime': runtime,
            'tvbox': 'false',
            'defResol': 'true',
            'embed': 'false',
            }
        session = Session()
        r = session.post(url_api, headers={'Referer': url}, data=data)
        data = json.loads(r.text)
        self._url_video = data['src']

        self.title = soup.find('meta', {'property': 'og:description'})['content']
        
        ext = get_ext(self._url_video)
        self.filename = format_filename(self.title, pid, ext)

        self.url_thumb = soup.find('meta', {'property': 'og:image'})['content']
        self.thumb = BytesIO()
        downloader.download(self.url_thumb, buffer=self.thumb)
        
        return self._url_video
def read_channel(url_page, cw=None):
    print_ = get_print(cw)
    res = re.find(CHANNEL_PATTERN, url_page)
    if res is None:
        raise Exception('Not channel')
    header, username = res
    print(header, username)
    max_pid = get_max_range(cw, 2000)
    info = {}
    info['header'] = header
    info['username'] = username
    session = Session()
    urls = []
    urls_set = set()
    for p in range(100):
        url_api = urljoin(url_page, '/{}/{}/videos/best/{}'.format(header, username, p))
        print(url_api)
        r = session.post(url_api, data='main_cats=false')
        soup = Soup(r.text)
        thumbs = soup.findAll('div', class_='thumb-block')
        if not thumbs:
            print_('empty')
            break
        for thumb in thumbs:
            info['name'] = thumb.find('span', class_='name').text.strip()
            href = thumb.find('a')['href']
            href = urljoin(url_page, href)
            if href in urls_set:
                print_('duplicate: {}'.format(href))
                continue
            urls_set.add(href)
            urls.append(href)
        
        if len(urls) >= max_pid:
            break
        
        s = '{} {} - {}'.format(tr_('읽는 중...'), info['name'], len(urls))
        if cw:
            if not cw.alive:
                return
            cw.setTitle(s)
        else:
            print(s)
    if not urls:
        raise Exception('no videos')
    info['urls'] = urls[:max_pid]
    return info
        
Esempio n. 6
0
def get_videos(url, cw=None):
    '''
    get_videos
    '''
    print_ = get_print(cw)

    if '/users/' in url:
        mode = 'users'
        username = url.split('/users/')[1].split('/')[0]
    elif '/pornstar/' in url:
        mode = 'pornstar'
        username = url.split('/pornstar/')[1].split('/')[0]
    elif '/model/' in url:
        mode = 'model'
        username = url.split('/model/')[1].split('/')[0]
    elif '/channels/' in url:
        mode = 'channels'
        username = url.split('/channels/')[1].split('/')[0]
    elif '/playlist/' in url:
        mode = 'playlist'
        username = url.split('/playlist/')[1].split('/')[0]
    else:
        raise Exception('Not supported url')
    username = username.split('?')[0].split('#')[0]

    session = Session()

    domain = utils.domain(url)

    if mode in ['pornstar']:
        url_main = 'https://{}/{}/{}'.format(domain, mode, username)
        html = downloader.read_html(url_main, session=session)
        soup = Soup(html)
        soup = fix_soup(soup, url_main, session, cw)
        for a in soup.findAll('a'):
            if '/{}/{}/videos/upload'.format(mode, username) in a.attrs.get('href', ''):
                free = True
                break
        else:
            free = False
        print_('free: {}'.format(free))

    # Range
    max_pid = get_max_range(cw, 500)
    max_pid = min(max_pid, 2000)#

    html = downloader.read_html(url, session=session)
    soup = fix_soup(Soup(html), url, session, cw)

    info = {}

    # get title
    h1 = soup.find('h1')
    if h1:
        header = 'Playlist'
        title = h1.find(id='watchPlaylist')
    else:
        title = None
    if not title:
        header = 'Channel'
        profile = soup.find('div', class_='profileUserName')
        wrapper = soup.find('div', class_='titleWrapper')
        bio = soup.find('div', class_='withBio')
        title = soup.find('h1', {'itemprop':'name'})
        if not title and profile:
            title = profile.a
        if not title and wrapper:
            title = wrapper.h1
        if not title and bio:
            title = bio.h1
    if not title:
        raise Exception('No title')
    #print(title)
    info['title'] = '[{}] {}'.format(header, title.text.strip())
    token = re.find('''token *= *['"](.*?)['"]''', html)
    print_('token: {}'.format(token))

    # get links
    hrefs = []
    fail = 0
    for p in range(1, 1+100):
        try:
            if mode in ['users', 'model']:
                if mode == 'users':
                    url_api = 'https://{}/users/{}/videos/public/'\
                              'ajax?o=mr&page={}'.format(domain, username, p)
                elif mode == 'model':
                    url_api = 'https://{}/model/{}/videos/upload/'\
                              'ajax?o=mr&page={}'.format(domain, username, p)
                r = session.post(url_api)
                soup = Soup(r.text)
                if soup.find('h1'):
                    print('break: h1')
                    break
            elif mode in ['pornstar']:
                if free:
                    url_api = 'https://{}/{}/{}/videos/upload'\
                              '?page={}'.format(domain, mode, username, p)
                    soup = downloader.read_soup(url_api, session=session)
                    soup = fix_soup(soup, url_api, session, cw)
                    soup = soup.find('div', class_='videoUList')
                else:
                    url_api = 'https://{}/{}/{}?page={}'.format(domain, mode, username, p)
                    soup = downloader.read_soup(url_api, session=session)
                    soup = fix_soup(soup, url_api, session, cw)
                    soup = soup.find('ul', class_='pornstarsVideos')
            elif mode in ['channels']:
                url_api = 'https://{}/{}/{}/videos?page={}'.format(domain, mode, username, p)
                soup = downloader.read_soup(url_api, session=session)
                soup = fix_soup(soup, url_api, session, cw)
                try:
                    soup = soup.find('div', {'id': 'channelsBody'}).find('div', class_='rightSide')
                except:
                    break
            elif mode in ['playlist']:
                #url_api = 'https://{}/playlist/viewChunked?id={}&offset={}&itemsPerPage=40'.format(domain, username, len(hrefs))
                if token is None:
                    raise Exception('no token')
                url_api = 'https://{}/playlist/viewChunked?id={}&token={}&page={}'.format(domain, username, token, p)
                soup = downloader.read_soup(url_api, session=session)
            else:
                raise NotImplementedError(mode)
            fail = 0
        except Exception as e:
            print_(e)
            fail += 1
            if fail < 2:
                continue
            else:
                break
        finally:
            print_('{}  ({})'.format(url_api, len(hrefs)))

        if cw and not cw.alive:
            return

        lis = soup.findAll('li', class_='videoblock')
        if not lis:
            print_('break: no lis')
            break

        if getattr(soup.find('title'), 'text', '').strip() == 'Page Not Found':
            print_('Page Not Found')
            break

        c = 0
        for li in lis:
            a = li.find('a')
            href = a.attrs['href']
            href = urljoin(url, href)
            if href in hrefs:
                continue
            c += 1
            if href.startswith('javascript:'): # Remove Pornhub Premium
                print(href)
                continue
            hrefs.append(href)
        if c == 0:
            print('c==0')
            break
        print(c) # 1320

        if len(hrefs) >= max_pid:
            break

    if cw:
        hrefs = filter_range(hrefs, cw.range)

    info['hrefs'] = hrefs

    return info
def get_imgs(user_id, cw=None):
    print_ = get_print(cw)
    url = 'https://{}.bdsmlr.com/'.format(user_id)
    session = Session()
    info = {'c': 0, 'posts': [], 'ids': set()}

    html = downloader.read_html(url, session=session)
    soup = Soup(html)

    sorry = soup.find('div', class_='sorry')
    if sorry:
        raise Exception(sorry.text.strip())

    username = soup.find('title').text.strip()  ###
    print('username:'******'username'] = username

    token = soup.find('meta', {'name': 'csrf-token'}).attrs['content']
    print_(u'token: {}'.format(token))
    foo(url, soup, info)

    max_pid = get_max_range(cw)  #, 2000)

    n = len(info['ids'])
    for p in range(1000):
        url_api = 'https://{}.bdsmlr.com/infinitepb2/{}'.format(
            user_id, user_id)
        data = {
            'scroll': str(info['c']),
            'timenow': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'last': str(info['last']),
        }
        print_(u'n:{}, scroll:{}, last:{}'.format(len(info['posts']),
                                                  data['scroll'],
                                                  data['last']))
        headers = {
            'Referer': url,
            'X-CSRF-TOKEN': token,
        }
        for try_ in range(4):
            try:
                r = session.post(url_api, data=data, headers=headers)
                if p == 0:
                    r.raise_for_status()
                break
            except Exception as e:
                print(e)
        else:
            raise
        soup = Soup(r.text)
        foo(url, soup, info)
        if len(info['ids']) == n:
            print('same; break')
            break
        n = len(info['ids'])

        s = u'{}  {} (tumblr_{}) - {}'.format(tr_(u'읽는 중...'), username,
                                              user_id, len(info['posts']))
        if cw is not None:
            if not cw.valid or not cw.alive:
                return []
            cw.setTitle(s)
        else:
            print(s)

        if len(info['posts']) > max_pid:
            break

    return info
def _guest_token(headers):
    session = Session()
    r = session.post('https://api.twitter.com/1.1/guest/activate.json',
                     headers=headers)
    data = json.loads(r.text)
    return data['guest_token']