def open(self, url: URL) -> bytes: try: headers=dict() headers['user-agent']=url.user_agent if url.referer: headers['Referer']=url.referer.get() if url.method == 'GET': response = requests.get(url.get(), cookies=url.coockies, proxies=self.proxies,headers=headers) elif url.method == 'POST': # print('Loading POST') # print(url.get(), url.post_data) response = requests.post(url.get(), data=url.post_data, proxies=self.proxies,headers=headers) else: raise LoaderError('Unknown method:' + url.method) response.raise_for_status() except requests.exceptions.HTTPError as err: raise LoaderError('HTTP error: {0}'.format(err.response.status_code)) except requests.exceptions.ConnectTimeout: raise LoaderError('Connection timeout') except requests.exceptions.ReadTimeout: raise LoaderError('Read timeout') except requests.exceptions.ConnectionError: raise LoaderError('Connection error') except: raise LoaderError('Unknown error in loader') else: return response.content
class HistoryView(QWidget): def __init__(self, parent: QWidget, view_manager: ViewManagerFromViewInterface): super().__init__(parent) self.view_manager = view_manager self.history_items = list() self.current_url = URL() self.history = None self.ui = Ui_HistoryView() savecwd = os.getcwd() os.chdir('view/qt_ui') self.ui.setupUi(self) os.chdir(savecwd) self.ui.combo_history.addItem(self.current_url.get()) self.binding() def binding(self): self.ui.bn_go.clicked.connect(self.on_go_clicked) self.ui.bn_back.clicked.connect(self.on_back_clicked) def update_history(self, history: HistoryFromViewInterface): self.history = history self.ui.combo_history.clear() self.history_items.clear() self.ui.combo_history.addItem(self.current_url.get()) for item in reversed(history.get_history()): self.ui.combo_history.addItem(item.url.get()) self.history_items.append(item) def set_current_url(self, url: URL): self.current_url = url self.ui.combo_history.setItemText(0, self.current_url.get()) def on_go_clicked(self): index = self.ui.combo_history.currentIndex() text = self.ui.combo_history.currentText() if index == 0: if text.strip() == self.current_url.get(): self.view_manager.goto_url(self.current_url) else: self.view_manager.goto_url(URL(text)) else: if text.strip() == self.history_items[index - 1].url.get(): item = self.history_items[index - 1] self.view_manager.goto_url(item.url, {'context': item.context}) else: self.view_manager.goto_url(URL(text)) def on_back_clicked(self): if self.history: item = self.history.back() if item: self.view_manager.goto_url(item.url, { 'context': item.context, 'no_history': True })
def open(self, url: URL, trick=None) -> bytes: trick = self.default_trick if trick is None else trick if trick is None: return b'' us = urllib.parse.urlsplit(url.get()) hostname = us[1] addr = socket.gethostbyname(hostname) if us[2] is not '': uri = us[2] else: uri = '/' if us[3] is not '': uri += '?' + us[3] try: result = self._send(addr, 80, self.trick_headers[trick].format(uri, hostname)) except Exception as e: raise LoaderError(e.__repr__()) (head, sp, body) = result.partition(b'\r\n\r\n') # print(head.decode(errors='ignore')) return body
def parse_pictures(self, soup: BeautifulSoup, url: URL): gallery = soup.find('div', {'class': 'ad-gallery'}) if gallery: for href in _iter(gallery.find_all('a', {'data-image': True})): image_url = URL(href.attrs['data-image']) filename = image_url.get_path() + image_url.get().rpartition( '/')[2] self.add_picture(filename, image_url)
def parse_pictures(self, soup: BeautifulSoup, url: URL): slideshow = soup.find('div', {'id': 'slideshow'}) if slideshow: for slide in _iter(slideshow.find_all('a', {'class': 'gal'})): src = URL(slide.img.attrs['src']) filename = src.get_path() + src.get().rpartition( '/')[2].partition('?')[0] self.add_picture(filename, src)
def parse_pictures(self, soup: BeautifulSoup, url: URL): gallery = soup.find('div', {'class': 'gallery-block'}) if gallery: for image in _iter(gallery.find_all('img', src=True)): image_url = URL( str(image.attrs['src']).replace('/thumbs/', '/')) filename = image_url.get_path() + image_url.get().rpartition( '/')[2] self.add_picture(filename, image_url)
def _parse_pagination(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'pagination-holder'}) if container: for page in _iter(container.find_all('a', {'href': True})): if not page.attrs['href'].startswith('#'): self.add_page(page.string, URL(page.attrs['href'], base_url=url)) else: pair_list=[('mode','async'), ('function','get_block'), ('block_id', page.attrs['data-block-id'])] parameters=page.attrs['data-parameters'].split(';') for item in parameters: key,unused,value=item.partition(':') pair_list.append(tuple([key,value])) xhr_url=URL(url.get()) xhr_url.add_query(pair_list) page_url=URL(url.get()) page_url.any_data=dict(xhr=xhr_url) self.add_page(page.string, page_url)
def parse_pagination(self, soup: BeautifulSoup, url: URL): pager = soup.find('div', {'class': 'pager'}) if pager: for page in _iter(pager.find_all('li', {'class': 'page'})): page_span = page.find('span', { 'data-query-key': True, 'data-query-value': True }) if page_span: label = str(page_span.string).strip() page_url = URL(url.get()) page_url.add_query([(page_span.attrs['data-query-key'], page_span.attrs['data-query-value'])]) self.add_page(label, page_url)
def set_url(self, url:URL): self.url=url request = QNetworkRequest(QUrl(url.get())) request.setHeader(QNetworkRequest.UserAgentHeader,url.user_agent) if url.referer: request.setRawHeader('Referer',url.referer.get()) # todo: сделать добавление cookie и подготовку proxу # print(request.rawHeaderList()) # print(request.rawHeader('User-Agent')) # print(request.rawHeader('Referer')) self.media_player.setMedia(QMediaContent(request))
def parse_pagination(self, soup: BeautifulSoup, url: URL): first_page = URL(url.get()) self.add_page('1', first_page) next_link = soup.find('link', {'rel': 'next'}) if next_link: json_file_href = next_link.attrs['href'].replace( 'page=', 'format=json&number_pages=1&page=') json_file_url = URL(json_file_href, base_url=url) page = json_file_href.rpartition('page=')[2] old_url = copy(url) old_url.any_data = dict(json_file_url=json_file_url) self.add_page(page, old_url)
def get_redirect_location(self, url: URL) -> URL: try: headers=dict() headers['user-agent']=url.user_agent if url.method == 'GET': response = requests.get(url.get(), cookies=url.coockies, proxies=self.proxies,headers=headers, stream=True, allow_redirects=False) elif url.method == 'POST': response = requests.post(url.get(), data=url.post_data, proxies=self.proxies,headers=headers, stream=True, allow_redirects=False) else: raise LoaderError('Unknown method:' + url.method) response.raise_for_status() except requests.exceptions.HTTPError as err: raise LoaderError('HTTP error: {0}'.format(err.response.status_code)) except requests.exceptions.ConnectTimeout: raise LoaderError('Connection timeout') except requests.exceptions.ReadTimeout: raise LoaderError('Read timeout') except requests.exceptions.ConnectionError: raise LoaderError('Connection error') except: raise LoaderError('Unknown error in loader') else: location=response.headers.get('Location', None) if location: return URL(location) else: return None
def parse_pagination(self, soup: BeautifulSoup, url: URL): container = soup.find('div', {'class': 'pagination-holder'}) if container is not None: for page in container.find_all('a', {'href': True}): if page.string and page.string.isdigit(): href = page.attrs['href'] if '#videos' in href: # "bypass" to Ajax # http://www.porntrex.com/categories/big-t**s/?mode=async&function=get_block&block_id=list_videos_common_videos_list&sort_by=post_date&from=08 url_txt = url.get().strip('/') part = url_txt.rpartition('/') if part[2].isdigit(): url_txt = part[0] self.add_page(page.string, URL(url_txt + '/' + page.string + '/')) else: self.add_page(page.string, URL(page.attrs['href'], base_url=url))
def parse_video_tags(self, soup: BeautifulSoup, url: URL): # info_box=soup.find('div',{'class':'content-container'}) for info_box in _iter( soup.find_all('div', {'class': 'content-container'})): # psp(info_box.prettify()) for href in _iter(info_box.find_all('a', href=True)): psp(href.prettify()) label = collect_string(href) href_url = URL(href.attrs['href'], base_url=url) print(label, href_url) color = None if href_url.contain('/users/'): color = 'blue' href_url = URL(href_url.get() + '/videos/public/') if href_url.contain('/pornstar/'): color = 'red' self.add_tag(label, href_url, style=dict(color=color))
def parse_video(self, soup: BeautifulSoup, url: URL): video = soup.find('div', {'class': 'video'}) if video is not None: psp(video.prettify()) for i, source in enumerate(_iter(video.find_all('source'))): # self.add_video('source_' + str(i), URL(source.attrs['src'].replace('https', 'http'), base_url=url)) self.add_video('source_' + str(i), URL(source.attrs['src'], base_url=url, referer=url.get())) self.set_default_video(-1)
def parse_video_title(self, soup: BeautifulSoup, url: URL) -> str: return url.get().rpartition('/')[0].rpartition('/')[2]
def get_image_filename(self,url:URL)->str: return url.get_path()+url.get().rpartition('/')[2]
def get_full_label(url: URL) -> str: return url.get().strip('/').rpartition('/')[2].partition('.')[0]
def get_thumb_label(url: URL) -> str: return url.get().partition(url.domain())[2].strip('/')
class VideoPlayerWidget(QWidget): def __init__(self, QWidget_parent=None, Qt_WindowFlags_flags=0): QWidget.__init__(self, QWidget_parent) self.ui=Ui_VideoPlayerWidget() savecwd = os.getcwd() os.chdir('view/qt_ui') self.ui.setupUi(self) os.chdir(savecwd) self.duration=0 self.urls=list() self.default=-1 self.url=URL() self.saved_position=None self.media_player = QMediaPlayer(None, QMediaPlayer.VideoSurface) self.media_player_widget = VideoWidget(self.ui.top_frame) self.ui.top_frame_layout.addWidget(self.media_player_widget) self.media_player.setVideoOutput(self.media_player_widget) self.media_player_widget.show() self.media_player.bufferStatusChanged.connect(lambda x:self.ui.buffer.setValue(x)) self.media_player.positionChanged.connect(self.positionChanged) self.media_player.durationChanged.connect(self.durationChanged) self.media_player.mediaStatusChanged.connect(self.media_status_changed) self.media_player.error.connect(self.handleError) # self.ui.buffer.hide() self.ui.bn_play.clicked.connect(self.media_player.play) self.ui.bn_pause.clicked.connect(self.media_player.pause) self.ui.bn_stop.clicked.connect(self.stop) self.ui.bn_mute.clicked.connect(self.media_player.setMuted) self.ui.progress.sliderMoved.connect(self.media_player.setPosition) self.ui.volume.valueChanged.connect(self.media_player.setVolume) def set_url_list(self, list_of_dict:list, default:int): self.urls=list_of_dict self.default=default self.set_url(self.urls[self.default]['url']) menu = QMenu(self) for item in self.urls: menu_action = QAction(item['text'], self, triggered=get_menu_handler(self.re_open,item['url'])) menu.addAction(menu_action) self.ui.bn_quality.setMenu(menu) def re_open(self, url:URL): self.saved_position=self.media_player.position() self.set_url(url) self.media_player.play() def set_url(self, url:URL): self.url=url request = QNetworkRequest(QUrl(url.get())) request.setHeader(QNetworkRequest.UserAgentHeader,url.user_agent) if url.referer: request.setRawHeader('Referer',url.referer.get()) # todo: сделать добавление cookie и подготовку proxу # print(request.rawHeaderList()) # print(request.rawHeader('User-Agent')) # print(request.rawHeader('Referer')) self.media_player.setMedia(QMediaContent(request)) # print(self.media_player.media().canonicalRequest()) def media_status_changed(self, media_status): if media_status == QMediaPlayer.BufferedMedia: if self.saved_position: self.media_player.setPosition(self.saved_position) self.saved_position = None def positionChanged(self, position): def time_format(ms): dur = ms // 1000 hours=dur // 3600 minutes = dur // 60 - hours*60 secundes = dur - minutes * 60- hours*3600 if hours==0: return '%2d:%02d' % (minutes, secundes) else: return '%d:%02d:%02d' % (hours, minutes, secundes) self.ui.progress.setValue(position) self.ui.lb_time.setText(time_format(position) + ' / ' + time_format(self.duration)) def durationChanged(self, duration): self.ui.progress.setRange(0, duration) self.duration = duration def little_forward(self, second:int): current_position=self.media_player.position() new_position=current_position + second*1000 if new_position < self.duration: self.media_player.setPosition(new_position) def play(self): self.media_player.play() def stop(self): self.media_player.stop() def pause(self): self.media_player.pause() def set_volume(self, volume:int): self.ui.volume.setValue(volume) self.media_player.setVolume(volume) def get_volume(self)->int: return self.ui.volume.value() def mute(self, on:bool): self.media_player.setMuted(on) self.ui.bn_mute.setChecked(on) def is_muted(self): return self.ui.bn_mute.isChecked() def set_error_handler(self, on_error=lambda error_text:None): self.on_error=on_error def handleError(self): # print(self.media_player.error()) print("Error in " + self.url.get() + ': ' + self.media_player.errorString()) self.on_error("Error in " + self.url.link() + ': ' + self.media_player.errorString()) # self.error_handler('Player error: ' + self.media_player.errorString()) def destroy(self, bool_destroyWindow=True, bool_destroySubWindows=True): self.media_player.deleteLater() super().destroy(bool_destroyWindow, bool_destroySubWindows)